From 501af75ce68a2dd4d5c0a561cd1c1a623e0cb39f Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Wed, 4 Oct 2023 17:11:05 -0700 Subject: [PATCH 01/23] ext: Updated the gem5 SST Bridge to use SST 13.0.0 This change updates the gem5 SST Bridge to use SST 13.0.0. Changes are made to replace SimpleMem class to StandardMem class as SimpleMem will be deprecated in SST 14 and above. In addition, the translator.hh is updated to translate more types of gem5 packets. A new parameter `ports` was added on SST's side when invoking the gem5 component which does not require recompiling the gem5 component whenever a new outgoing bridge is added in a gem5 config. Change-Id: I45f0013bc35d088df0aa5a71951422cabab4d7f7 Signed-off-by: Kaustav Goswami --- configs/example/sst/riscv_fs.py | 4 +- ext/sst/INSTALL.md | 44 +++++---- ext/sst/Makefile.linux | 21 +++++ ext/sst/Makefile.mac | 21 +++++ ext/sst/gem5.cc | 65 +++++++++---- ext/sst/gem5.hh | 12 ++- ext/sst/sst/arm_example.py | 61 +++++++----- ext/sst/sst/example.py | 41 ++++++--- ext/sst/sst_responder.hh | 5 +- ext/sst/sst_responder_subcomponent.cc | 93 ++++++++++--------- ext/sst/sst_responder_subcomponent.hh | 22 ++--- ext/sst/translator.hh | 128 +++++++++++++++++++------- 12 files changed, 355 insertions(+), 162 deletions(-) create mode 100644 ext/sst/Makefile.linux create mode 100644 ext/sst/Makefile.mac diff --git a/configs/example/sst/riscv_fs.py b/configs/example/sst/riscv_fs.py index 77db9e4dbe..e194460db3 100644 --- a/configs/example/sst/riscv_fs.py +++ b/configs/example/sst/riscv_fs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 The Regents of the University of California +# Copyright (c) 2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -104,7 +104,7 @@ def createHiFivePlatform(system): system.platform.pci_host.pio = system.membus.mem_side_ports - system.platform.rtc = RiscvRTC(frequency=Frequency("100MHz")) + system.platform.rtc = RiscvRTC(frequency=Frequency("10MHz")) system.platform.clint.int_pin = system.platform.rtc.int_pin system.pma_checker = PMAChecker( diff --git a/ext/sst/INSTALL.md b/ext/sst/INSTALL.md index 91f92eb7ff..d7b9b6fd87 100644 --- a/ext/sst/INSTALL.md +++ b/ext/sst/INSTALL.md @@ -1,8 +1,8 @@ # Installing SST -The links to download SST source code are available here -[http://sst-simulator.org/SSTPages/SSTMainDownloads/]. -This guide is using the most recent SST version (11.0.0) as of September 2021. +The links to download SST source code are available at +. +This guide is using the most recent SST version (13.0.0) as of September 2023. The following guide assumes `$SST_CORE_HOME` as the location where SST will be installed. @@ -11,14 +11,14 @@ installed. ### Downloading the SST-Core Source Code ```sh -wget https://github.com/sstsimulator/sst-core/releases/download/v11.1.0_Final/sstcore-11.1.0.tar.gz -tar xf sstcore-11.1.0.tar.gz +wget https://github.com/sstsimulator/sst-core/releases/download/v13.0.0_Final/sstcore-13.0.0.tar.gz +tar xvf sstcore-13.0.0.tar.gz ``` ### Installing SST-Core ```sh -cd sstcore-11.1.0 +cd sstcore-13.0.0 ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \ --disable-mpi # optional, used when MPI is not available. make all -j$(nproc) @@ -36,14 +36,14 @@ export PATH=$SST_CORE_HOME/bin:$PATH ### Downloading the SST-Elements Source Code ```sh -wget https://github.com/sstsimulator/sst-elements/releases/download/v11.1.0_Final/sstelements-11.1.0.tar.gz -tar xf sstelements-11.1.0.tar.gz +wget https://github.com/sstsimulator/sst-elements/releases/download/v13.0.0_Final/sstelements-13.0.0.tar.gz +tar xvf sstelements-13.0.0.tar.gz ``` ### Installing SST-Elements ```sh -cd sst-elements-library-11.1.0 +cd sst-elements-library-13.0.0 ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \ --with-sst-core=$SST_CORE_HOME make all -j$(nproc) @@ -58,24 +58,34 @@ echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$SST_CORE_HOME/lib/pkgconfig/" >> ### Building gem5 library -At the root of gem5 folder, - +At the root of the gem5 folder, you need to compile gem5 as a library. This +varies which OS you use. If you're using Linux, then type the following: ```sh scons build/RISCV/libgem5_opt.so -j $(nproc) --without-tcmalloc --duplicate-sources ``` +In case you're using Mac, then type the following: +```sh +scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-sources +``` -**Note:** `--without-tcmalloc` is required to avoid a conflict with SST's malloc. -`--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory. +**Note:** +* `--without-tcmalloc` is required to avoid a conflict with SST's malloc. +* `--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory. +* The Mac version was tested on a Macbook Air with M2 processor. ### Compiling the SST integration -At the root of gem5 folder, - +Go to the SST directory in the gem5 repo. ```sh cd ext/sst -make ``` - +According to the OS that you're using, you need to rename the `Makefile.xxx` to `Makefile`. +```sh +cp Makefile.xxx Makefile # linux or mac +make -j4 +``` +Change `ARCH=RISCV` to `ARCH=ARM` in the `Makefile` in case you're compiling +for ARM. ### Running an example simulation See `README.md` diff --git a/ext/sst/Makefile.linux b/ext/sst/Makefile.linux new file mode 100644 index 0000000000..f44ecd46d9 --- /dev/null +++ b/ext/sst/Makefile.linux @@ -0,0 +1,21 @@ +SST_VERSION=SST-13.0.0 # Name of the .pc file in lib/pkgconfig where SST is installed +GEM5_LIB=gem5_opt +ARCH=RISCV +OFLAG=3 + +LDFLAGS=-shared -fno-common ${shell pkg-config ${SST_VERSION} --libs} -L../../build/${ARCH}/ -Wl,-rpath ../../build/${ARCH} +CXXFLAGS=-std=c++17 -g -O${OFLAG} -fPIC ${shell pkg-config ${SST_VERSION} --cflags} ${shell python3-config --includes} -I../../build/${ARCH}/ -I../../ext/pybind11/include/ -I../../build/softfloat/ -I../../ext +CPPFLAGS+=-MMD -MP +SRC=$(wildcard *.cc) + +.PHONY: clean all + +all: libgem5.so + +libgem5.so: $(SRC:%.cc=%.o) + ${CXX} ${CPPFLAGS} ${LDFLAGS} $? -o $@ -l${GEM5_LIB} + +-include $(SRC:%.cc=%.d) + +clean: + ${RM} *.[do] libgem5.so diff --git a/ext/sst/Makefile.mac b/ext/sst/Makefile.mac new file mode 100644 index 0000000000..4a67570a44 --- /dev/null +++ b/ext/sst/Makefile.mac @@ -0,0 +1,21 @@ +SST_VERSION=SST-13.0.0 # Name of the .pc file in lib/pkgconfig where SST is installed +GEM5_LIB=gem5_opt +ARCH=RISCV +OFLAG=3 + +LDFLAGS=-shared -fno-common ${shell pkg-config ${SST_VERSION} --libs} -L../../build/${ARCH}/ -Wl,-rpath ../../build/${ARCH} +CXXFLAGS=-std=c++17 -g -O${OFLAG} -fPIC ${shell pkg-config ${SST_VERSION} --cflags} ${shell python3-config --includes} -I../../build/${ARCH}/ -I../../ext/pybind11/include/ -I../../build/softfloat/ -I../../ext +CPPFLAGS+=-MMD -MP +SRC=$(wildcard *.cc) + +.PHONY: clean all + +all: libgem5.dylib + +libgem5.dylib: $(SRC:%.cc=%.o) + ${CXX} ${CPPFLAGS} ${LDFLAGS} $? -o $@ -l${GEM5_LIB} + +-include $(SRC:%.cc=%.d) + +clean: + ${RM} *.[do] libgem5.dylib diff --git a/ext/sst/gem5.cc b/ext/sst/gem5.cc index 7af0eed7b7..6dc305f2ed 100644 --- a/ext/sst/gem5.cc +++ b/ext/sst/gem5.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021 The Regents of the University of California +// Copyright (c) 2023 The Regents of the University of California // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -70,7 +70,6 @@ #include #include -#include #include #include #include @@ -169,16 +168,29 @@ gem5Component::gem5Component(SST::ComponentId_t id, SST::Params& params): registerAsPrimaryComponent(); primaryComponentDoNotEndSim(); - systemPort = \ - loadUserSubComponent("system_port",0); - cachePort = \ - loadUserSubComponent("cache_port", 0); - - systemPort->setTimeConverter(timeConverter); - systemPort->setOutputStream(&(output)); - cachePort->setTimeConverter(timeConverter); - cachePort->setOutputStream(&(output)); - + // We need to add another parameter when invoking gem5 scripts from SST to + // keep a track of all the OutgoingBridges. This will allow to add or + // remove OutgoingBridges from gem5 configs without the need to recompile + // the ext/sst source everytime. + std::string ports = params.find("ports", ""); + if (ports.empty()) { + output.fatal( + CALL_INFO, -1, "Component %s must have a 'ports' parameter.\n", + getName().c_str() + ); + } + // Split the port names using the util method defined. + splitPortNames(ports); + for (int i = 0 ; i < sstPortCount ; i++) { + std::cout << sstPortNames[i] << std::endl; + sstPorts.push_back( + loadUserSubComponent(sstPortNames[i], 0) + ); + // If the name defined in the `ports` is incorrect, then the program + // will crash when calling `setTimeConverter`. + sstPorts[i]->setTimeConverter(timeConverter); + sstPorts[i]->setOutputStream(&(output)); + } } gem5Component::~gem5Component() @@ -216,8 +228,9 @@ gem5Component::init(unsigned phase) // find the corresponding SimObject for each SSTResponderSubComponent gem5::Root* gem5_root = gem5::Root::root(); - systemPort->findCorrespondingSimObject(gem5_root); - cachePort->findCorrespondingSimObject(gem5_root); + for (auto &port : sstPorts) { + port->findCorrespondingSimObject(gem5_root); + } // initialize the gem5 event queue if (!(threadInitialized)) { @@ -230,17 +243,18 @@ gem5Component::init(unsigned phase) } } - - systemPort->init(phase); - cachePort->init(phase); + for (auto &port : sstPorts) { + port->init(phase); + } } void gem5Component::setup() { output.verbose(CALL_INFO, 1, 0, "Component is being setup.\n"); - systemPort->setup(); - cachePort->setup(); + for (auto &port : sstPorts) { + port->setup(); + } } void @@ -427,3 +441,16 @@ gem5Component::splitCommandArgs(std::string &cmd, std::vector &args) for (auto part: parsed_args) args.push_back(strdup(part.c_str())); } + +void +gem5Component::splitPortNames(std::string port_names) +{ + std::vector parsed_args = tokenizeString( + port_names, {'\\', ' ', '\'', '\"'} + ); + sstPortCount = 0; + for (auto part: parsed_args) { + sstPortNames.push_back(strdup(part.c_str())); + sstPortCount++; + } +} diff --git a/ext/sst/gem5.hh b/ext/sst/gem5.hh index 447c68c3b2..172c2c8e76 100644 --- a/ext/sst/gem5.hh +++ b/ext/sst/gem5.hh @@ -1,4 +1,4 @@ -// Copyright (c) 2021 The Regents of the University of California +// Copyright (c) 2023 The Regents of the University of California // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -108,15 +108,20 @@ class gem5Component: public SST::Component private: SST::Output output; - SSTResponderSubComponent* systemPort; - SSTResponderSubComponent* cachePort; uint64_t clocksProcessed; SST::TimeConverter* timeConverter; gem5::GlobalSimLoopExitEvent *simulateLimitEvent; std::vector args; + // We need a list of incoming port names so that we don't need to recompile + // everytime when we add a new OutgoingBridge from python. + std::vector sstPorts; + std::vector sstPortNames; + int sstPortCount; + void initPython(int argc, char **argv); void splitCommandArgs(std::string &cmd, std::vector &args); + void splitPortNames(std::string port_names); bool threadInitialized; @@ -139,6 +144,7 @@ class gem5Component: public SST::Component ) SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( + // These are the generally expected ports. {"system_port", "Connection to gem5 system_port", "gem5.gem5Bridge"}, {"cache_port", "Connection to gem5 CPU", "gem5.gem5Bridge"} ) diff --git a/ext/sst/sst/arm_example.py b/ext/sst/sst/arm_example.py index cdee3ca40a..9978cc702b 100644 --- a/ext/sst/sst/arm_example.py +++ b/ext/sst/sst/arm_example.py @@ -1,16 +1,4 @@ -# Copyright (c) 2021 Arm Limited -# All rights reserved. -# -# The license below extends only to copyright in the software and shall -# not be construed as granting a license to any other intellectual -# property including but not limited to intellectual property relating -# to a hardware implementation of the functionality of the software -# licensed hereunder. You may use the software subject to the license -# terms below provided that you ensure that this notice is replicated -# unmodified and in its entirety in all distributions of the software, -# modified or unmodified, in source code or in binary form. -# -# Copyright (c) 2021 The Regents of the University of California +# Copyright (c) 2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -35,6 +23,18 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Copyright (c) 2021 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. import sst import sys @@ -46,9 +46,10 @@ kernel = "vmlinux_exit.arm64" cpu_clock_rate = "3GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory -# currently, we do not subtract 0x80000000 from the request's address to get the "real" address -# so, the mem_size would always be 2GiB larger than the desired memory size +# gem5 will send requests to physical addresses of range [0x80000000, inf) to +# memory currently, we do not subtract 0x80000000 from the request's address to +# get the "real" address so, the mem_size would always be 2GiB larger than the +# desired memory size memory_size_gem5 = "4GiB" memory_size_sst = "16GiB" addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() @@ -69,9 +70,22 @@ --cpu-clock-rate {cpu_clock_rate} \ --memory-size {memory_size_gem5}" +# We keep a track of all the memory ports that we have. +sst_ports = { + "system_port" : "system.system_outgoing_bridge", + "cache_port" : "system.memory_outgoing_bridge" +} + +# We need a list of ports. +port_list = [] +for port in sst_ports: + port_list.append(port) + cpu_params = { "frequency": cpu_clock_rate, "cmd": gem5_command, + "ports" : " ".join(port_list), + "debug_flags" : "" } gem5_node = sst.Component("gem5_node", "gem5.gem5Component") @@ -79,16 +93,16 @@ cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) - -system_port = gem5_node.setSubComponent("system_port", "gem5.gem5Bridge", 0) # for initialization +# for initialization +system_port = gem5_node.setSubComponent("system_port", "gem5.gem5Bridge", 0) system_port.addParams({ - "response_receiver_name": "system.system_outgoing_bridge", + "response_receiver_name": sst_ports["system_port"], "mem_size": memory_size_sst }) - -cache_port = gem5_node.setSubComponent("cache_port", "gem5.gem5Bridge", 0) # SST -> gem5 +# SST -> gem5 +cache_port = gem5_node.setSubComponent("cache_port", "gem5.gem5Bridge", 0) cache_port.addParams({ - "response_receiver_name": "system.memory_outgoing_bridge", + "response_receiver_name": sst_ports["cache_port"], "mem_size": memory_size_sst }) @@ -98,11 +112,12 @@ # Memory memctrl = sst.Component("memory", "memHierarchy.MemController") +# `addr_range_end` should be changed accordingly to memory_size_sst memctrl.addParams({ "debug" : "0", "clock" : "1GHz", "request_width" : "64", - "addr_range_end" : addr_range_end, # should be changed accordingly to memory_size_sst + "addr_range_end" : addr_range_end, }) memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") memory.addParams({ diff --git a/ext/sst/sst/example.py b/ext/sst/sst/example.py index 76cf8ad24e..fe2e19cb0e 100644 --- a/ext/sst/sst/example.py +++ b/ext/sst/sst/example.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 The Regents of the University of California +# Copyright (c) 2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -34,9 +34,10 @@ bbl = "riscv-boot-exit-nodisk" cpu_clock_rate = "3GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory -# currently, we do not subtract 0x80000000 from the request's address to get the "real" address -# so, the mem_size would always be 2GiB larger than the desired memory size +# gem5 will send requests to physical addresses of range [0x80000000, inf) to +# memory currently, we do not subtract 0x80000000 from the request's address to +# get the "real" address so, the mem_size would always be 2GiB larger than the +# desired memory size memory_size_gem5 = "4GiB" memory_size_sst = "6GiB" addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() @@ -52,10 +53,24 @@ "L1" : "1", } +# We keep a track of all the memory ports that we have. +sst_ports = { + "system_port" : "system.system_outgoing_bridge", + "cache_port" : "system.memory_outgoing_bridge" +} + +# We need a list of ports. +port_list = [] +for port in sst_ports: + port_list.append(port) + cpu_params = { "frequency": cpu_clock_rate, - "cmd": " ../../configs/example/sst/riscv_fs.py --cpu-clock-rate {} --memory-size {}".format(cpu_clock_rate, memory_size_gem5), - "debug_flags": "" + "cmd": " ../../configs/example/sst/riscv_fs.py" + + f" --cpu-clock-rate {cpu_clock_rate}" + + f" --memory-size {memory_size_gem5}", + "debug_flags": "", + "ports" : " ".join(port_list) } gem5_node = sst.Component("gem5_node", "gem5.gem5Component") @@ -64,11 +79,14 @@ cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) -system_port = gem5_node.setSubComponent("system_port", "gem5.gem5Bridge", 0) # for initialization -system_port.addParams({ "response_receiver_name": "system.system_outgoing_bridge"}) # tell the SubComponent the name of the corresponding SimObject +# for initialization +system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) +# tell the SubComponent the name of the corresponding SimObject +system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) -cache_port = gem5_node.setSubComponent("cache_port", "gem5.gem5Bridge", 0) # SST -> gem5 -cache_port.addParams({ "response_receiver_name": "system.memory_outgoing_bridge"}) +# SST -> gem5 +cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0) +cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) # L1 cache l1_cache = sst.Component("l1_cache", "memHierarchy.Cache") @@ -76,11 +94,12 @@ # Memory memctrl = sst.Component("memory", "memHierarchy.MemController") +# `addr_range_end` should be changed accordingly to memory_size_sst memctrl.addParams({ "debug" : "0", "clock" : "1GHz", "request_width" : "64", - "addr_range_end" : addr_range_end, # should be changed accordingly to memory_size_sst + "addr_range_end" : addr_range_end, }) memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") memory.addParams({ diff --git a/ext/sst/sst_responder.hh b/ext/sst/sst_responder.hh index a89d311064..8f9bc2976d 100644 --- a/ext/sst/sst_responder.hh +++ b/ext/sst/sst_responder.hh @@ -1,4 +1,4 @@ -// Copyright (c) 2021 The Regents of the University of California +// Copyright (c) 2023 The Regents of the University of California // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,9 +35,8 @@ #include #include -#include #include -#include +#include #include #include diff --git a/ext/sst/sst_responder_subcomponent.cc b/ext/sst/sst_responder_subcomponent.cc index 366f99aecf..e7ac78673f 100644 --- a/ext/sst/sst_responder_subcomponent.cc +++ b/ext/sst/sst_responder_subcomponent.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021 The Regents of the University of California +// Copyright (c) 2023 The Regents of the University of California // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -64,13 +64,12 @@ SSTResponderSubComponent::setTimeConverter(SST::TimeConverter* tc) // SHARE_PORTS means the interface can use our port as if it were its own // INSERT_STATS means the interface will inherit our statistic // configuration (e.g., if ours are enabled, the interface’s will be too) - memoryInterface = \ - loadAnonymousSubComponent( - "memHierarchy.memInterface", "memory", 0, - SST::ComponentInfo::SHARE_PORTS | SST::ComponentInfo::INSERT_STATS, - interface_params, timeConverter, - new SST::Interfaces::SimpleMem::Handler( - this, &SSTResponderSubComponent::portEventHandler) + memoryInterface = loadAnonymousSubComponent( + "memHierarchy.standardInterface", "memory", 0, + SST::ComponentInfo::SHARE_PORTS | SST::ComponentInfo::INSERT_STATS, + interface_params, timeConverter, + new SST::Interfaces::StandardMem::Handler( + this, &SSTResponderSubComponent::portEventHandler) ); assert(memoryInterface != NULL); } @@ -91,9 +90,9 @@ SSTResponderSubComponent::setResponseReceiver( bool SSTResponderSubComponent::handleTimingReq( - SST::Interfaces::SimpleMem::Request* request) + SST::Interfaces::StandardMem::Request* request) { - memoryInterface->sendRequest(request); + memoryInterface->send(request); return true; } @@ -104,12 +103,10 @@ SSTResponderSubComponent::init(unsigned phase) for (auto p: responseReceiver->getInitData()) { gem5::Addr addr = p.first; std::vector data = p.second; - SST::Interfaces::SimpleMem::Request* request = \ - new SST::Interfaces::SimpleMem::Request( - SST::Interfaces::SimpleMem::Request::Command::Write, addr, - data.size(), data - ); - memoryInterface->sendInitData(request); + SST::Interfaces::StandardMem::Request* request = \ + new SST::Interfaces::StandardMem::Write( + addr, data.size(), data); + memoryInterface->sendUntimedData(request); } } memoryInterface->init(phase); @@ -132,20 +129,24 @@ SSTResponderSubComponent::findCorrespondingSimObject(gem5::Root* gem5_root) void SSTResponderSubComponent::handleSwapReqResponse( - SST::Interfaces::SimpleMem::Request* request) + SST::Interfaces::StandardMem::Request* request) { // get the data, then, // 1. send a response to gem5 with the original data // 2. send a write to memory with atomic op applied - SST::Interfaces::SimpleMem::Request::id_t request_id = request->id; + SST::Interfaces::StandardMem::Request::id_t request_id = request->getID(); TPacketMap::iterator it = sstRequestIdToPacketMap.find(request_id); assert(it != sstRequestIdToPacketMap.end()); - std::vector data = request->data; + std::vector data = \ + dynamic_cast(request)->data; // step 1 gem5::PacketPtr pkt = it->second; - pkt->setData(request->data.data()); + pkt->setData( + dynamic_cast( + request)->data.data() + ); pkt->makeAtomicResponse(); pkt->headerDelay = pkt->payloadDelay = 0; if (blocked() || !responseReceiver->sendTimingResp(pkt)) @@ -153,27 +154,29 @@ SSTResponderSubComponent::handleSwapReqResponse( // step 2 (*(pkt->getAtomicOp()))(data.data()); // apply the atomic op - SST::Interfaces::SimpleMem::Request::Command cmd = \ - SST::Interfaces::SimpleMem::Request::Command::Write; - SST::Interfaces::SimpleMem::Addr addr = request->addr; + // This is a Write. Need to use the Write visitor class. But the original + // request is a read response. Therefore, we need to find the address and + // the data size and then call Write. + SST::Interfaces::StandardMem::Addr addr = \ + dynamic_cast(request)->pAddr; auto data_size = data.size(); - SST::Interfaces::SimpleMem::Request* write_request = \ - new SST::Interfaces::SimpleMem::Request( - cmd, addr, data_size, data - ); - write_request->setMemFlags( - SST::Interfaces::SimpleMem::Request::Flags::F_LOCKED); - memoryInterface->sendRequest(write_request); + // Create the Write request here. + SST::Interfaces::StandardMem::Request* write_request = \ + new SST::Interfaces::StandardMem::Write(addr, data_size, data); + // F_LOCKED flag in SimpleMem was changed to ReadLock and WriteUnlock + // visitor classes. This has to be addressed in the future. The boot test + // works without using ReadLock and WriteUnlock classes. + memoryInterface->send(write_request); delete request; } void SSTResponderSubComponent::portEventHandler( - SST::Interfaces::SimpleMem::Request* request) + SST::Interfaces::StandardMem::Request* request) { // Expect to handle an SST response - SST::Interfaces::SimpleMem::Request::id_t request_id = request->id; + SST::Interfaces::StandardMem::Request::id_t request_id = request->getID(); TPacketMap::iterator it = sstRequestIdToPacketMap.find(request_id); @@ -193,19 +196,27 @@ SSTResponderSubComponent::portEventHandler( Translator::inplaceSSTRequestToGem5PacketPtr(pkt, request); - if (blocked() || !(responseReceiver->sendTimingResp(pkt))) + if (blocked() || !(responseReceiver->sendTimingResp(pkt))) { responseQueue.push(pkt); - } else { // we can handle unexpected invalidates, but nothing else. - SST::Interfaces::SimpleMem::Request::Command cmd = request->cmd; - if (cmd == SST::Interfaces::SimpleMem::Request::Command::WriteResp) + } + } else { + // we can handle unexpected invalidates, but nothing else. + if (SST::Interfaces::StandardMem::Read* test = + dynamic_cast(request)) { return; - assert(cmd == SST::Interfaces::SimpleMem::Request::Command::Inv); - - // make Req/Pkt for Snoop/no response needed + } + else if (SST::Interfaces::StandardMem::WriteResp* test = + dynamic_cast( + request)) { + return; + } + // for Snoop/no response needed // presently no consideration for masterId, packet type, flags... gem5::RequestPtr req = std::make_shared( - request->addr, request->size, 0, 0 - ); + dynamic_cast( + request)->pAddr, + dynamic_cast( + request)->size, 0, 0); gem5::PacketPtr pkt = new gem5::Packet( req, gem5::MemCmd::InvalidateReq); diff --git a/ext/sst/sst_responder_subcomponent.hh b/ext/sst/sst_responder_subcomponent.hh index 51bc4f9318..524e53e4c1 100644 --- a/ext/sst/sst_responder_subcomponent.hh +++ b/ext/sst/sst_responder_subcomponent.hh @@ -1,4 +1,4 @@ -// Copyright (c) 2021 The Regents of the University of California +// Copyright (c) 2023 The Regents of the University of California // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -36,10 +36,8 @@ #include #include - -#include #include -#include +#include #include #include @@ -59,12 +57,12 @@ class SSTResponderSubComponent: public SST::SubComponent gem5::OutgoingRequestBridge* responseReceiver; gem5::SSTResponderInterface* sstResponder; - SST::Interfaces::SimpleMem* memoryInterface; + SST::Interfaces::StandardMem* memoryInterface; SST::TimeConverter* timeConverter; SST::Output* output; std::queue responseQueue; - std::vector initRequests; + std::vector initRequests; std::string gem5SimObjectName; std::string memSize; @@ -78,7 +76,7 @@ class SSTResponderSubComponent: public SST::SubComponent void setOutputStream(SST::Output* output_); void setResponseReceiver(gem5::OutgoingRequestBridge* gem5_bridge); - void portEventHandler(SST::Interfaces::SimpleMem::Request* request); + void portEventHandler(SST::Interfaces::StandardMem::Request* request); bool blocked(); void setup(); @@ -86,18 +84,18 @@ class SSTResponderSubComponent: public SST::SubComponent // return true if the SimObject could be found bool findCorrespondingSimObject(gem5::Root* gem5_root); - bool handleTimingReq(SST::Interfaces::SimpleMem::Request* request); + bool handleTimingReq(SST::Interfaces::StandardMem::Request* request); void handleRecvRespRetry(); void handleRecvFunctional(gem5::PacketPtr pkt); - void handleSwapReqResponse(SST::Interfaces::SimpleMem::Request* request); + void handleSwapReqResponse(SST::Interfaces::StandardMem::Request* request); TPacketMap sstRequestIdToPacketMap; public: // register the component to SST SST_ELI_REGISTER_SUBCOMPONENT_API(SSTResponderSubComponent); - SST_ELI_REGISTER_SUBCOMPONENT_DERIVED( + SST_ELI_REGISTER_SUBCOMPONENT( SSTResponderSubComponent, - "gem5", // SST will look for libgem5.so + "gem5", // SST will look for libgem5.so or libgem5.dylib "gem5Bridge", SST_ELI_ELEMENT_VERSION(1, 0, 0), "Initialize gem5 and link SST's ports to gem5's ports", @@ -106,7 +104,7 @@ class SSTResponderSubComponent: public SST::SubComponent SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( {"memory", "Interface to the memory subsystem", \ - "SST::Interfaces::SimpleMem"} + "SST::Interfaces::StandardMem"} ) SST_ELI_DOCUMENT_PORTS( diff --git a/ext/sst/translator.hh b/ext/sst/translator.hh index 2d8c8b782a..236abddcd7 100644 --- a/ext/sst/translator.hh +++ b/ext/sst/translator.hh @@ -1,4 +1,4 @@ -// Copyright (c) 2021 The Regents of the University of California +// Copyright (c) 2023 The Regents of the University of California // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -27,87 +27,143 @@ #ifndef __TRANSLATOR_H__ #define __TRANSLATOR_H__ -#include +#include #include -#include #include #include #include -typedef std::unordered_map TPacketMap; namespace Translator { -inline SST::Interfaces::SimpleMem::Request* +inline SST::Interfaces::StandardMem::Request* gem5RequestToSSTRequest(gem5::PacketPtr pkt, TPacketMap& sst_request_id_to_packet_map) { - SST::Interfaces::SimpleMem::Request::Command cmd; + // Listing all the different SST Memory commands. + enum sst_standard_mem_commands + { + Read, + ReadResp, + Write, + WriteResp, + FlushAddr, + FlushResp, + ReadLock, + WriteUnlock, + LoadLink, + StoreConditional, + MoveData, + CustomReq, + CustomResp, + InvNotify + + }; + // SST's standard memory class has visitor classes for all the different + // types of memory commands. Request class now does not have a command + // variable. Instead for different types of request, we now need to + // dynamically cast the class object. I'm using an extra variable to map + // the type of command for SST. + int sst_command_type = -1; + // StandardMem only has one cache flush class with an option to flush or + // flush and invalidate an address. By default, this is set to true so that + // it corresponds to ge,::MemCmd::InvalidateReq + bool flush_addr_flag = true; switch ((gem5::MemCmd::Command)pkt->cmd.toInt()) { case gem5::MemCmd::HardPFReq: case gem5::MemCmd::SoftPFReq: case gem5::MemCmd::SoftPFExReq: case gem5::MemCmd::LoadLockedReq: case gem5::MemCmd::ReadExReq: + case gem5::MemCmd::ReadCleanReq: + case gem5::MemCmd::ReadSharedReq: case gem5::MemCmd::ReadReq: case gem5::MemCmd::SwapReq: - cmd = SST::Interfaces::SimpleMem::Request::Command::Read; + sst_command_type = Read; break; case gem5::MemCmd::StoreCondReq: + case gem5::MemCmd::WritebackDirty: + case gem5::MemCmd::WritebackClean: case gem5::MemCmd::WriteReq: - cmd = SST::Interfaces::SimpleMem::Request::Command::Write; + sst_command_type = Write; break; case gem5::MemCmd::CleanInvalidReq: case gem5::MemCmd::InvalidateReq: - cmd = SST::Interfaces::SimpleMem::Request::Command::FlushLineInv; + sst_command_type = FlushAddr; break; case gem5::MemCmd::CleanSharedReq: - cmd = SST::Interfaces::SimpleMem::Request::Command::FlushLine; + sst_command_type = FlushAddr; + flush_addr_flag = false; break; default: panic("Unable to convert gem5 packet: %s\n", pkt->cmd.toString()); } - SST::Interfaces::SimpleMem::Addr addr = pkt->getAddr(); - - uint8_t* data_ptr = pkt->getPtr(); + SST::Interfaces::StandardMem::Addr addr = pkt->getAddr(); auto data_size = pkt->getSize(); - std::vector data = std::vector( - data_ptr, data_ptr + data_size - ); + std::vector data; + // Need to make sure that the command type is a Write to retrive the data + // data_ptr. + if (sst_command_type == Write) { + uint8_t* data_ptr = pkt->getPtr(); + data = std::vector(data_ptr, data_ptr + data_size); - SST::Interfaces::SimpleMem::Request* request = \ - new SST::Interfaces::SimpleMem::Request( - cmd, addr, data_size, data - ); + } + // Now convert a sst StandardMem request. + SST::Interfaces::StandardMem::Request* request = nullptr; + // find the corresponding memory command type. + switch(sst_command_type) { + case Read: + request = new SST::Interfaces::StandardMem::Read(addr, data_size); + break; + case Write: + request = + new SST::Interfaces::StandardMem::Write(addr, data_size, data); + break; + case FlushAddr: { + // StandardMem::FlushAddr has a invoking variable called `depth` + // which defines the number of cache levels to invalidate. Ideally + // this has to be input from the SST config, however in + // implementation I'm hardcoding this value to 2. + int cache_depth = 2; + request = + new SST::Interfaces::StandardMem::FlushAddr( + addr, data_size, flush_addr_flag, cache_depth); + break; + } + default: + panic("Unable to translate command %d to Request class!", + sst_command_type); + } if ((gem5::MemCmd::Command)pkt->cmd.toInt() == gem5::MemCmd::LoadLockedReq || (gem5::MemCmd::Command)pkt->cmd.toInt() == gem5::MemCmd::SwapReq || pkt->req->isLockedRMW()) { - request->setMemFlags( - SST::Interfaces::SimpleMem::Request::Flags::F_LOCKED); - } else if ((gem5::MemCmd::Command)pkt->cmd.toInt() == \ + // F_LOCKED is deprecated. Therefore I'm skipping this flag for the + // StandardMem request. + } else if ((gem5::MemCmd::Command)pkt->cmd.toInt() == gem5::MemCmd::StoreCondReq) { - request->setMemFlags( - SST::Interfaces::SimpleMem::Request::Flags::F_LLSC); + // F_LLSC is deprecated. Therefore I'm skipping this flag for the + // StandardMem request. } if (pkt->req->isUncacheable()) { - request->setFlags( - SST::Interfaces::SimpleMem::Request::Flags::F_NONCACHEABLE); + request->setFlag( + SST::Interfaces::StandardMem::Request::Flag::F_NONCACHEABLE); } if (pkt->needsResponse()) - sst_request_id_to_packet_map[request->id] = pkt; + sst_request_id_to_packet_map[request->getID()] = pkt; return request; } inline void inplaceSSTRequestToGem5PacketPtr(gem5::PacketPtr pkt, - SST::Interfaces::SimpleMem::Request* request) + SST::Interfaces::StandardMem::Request* request) { pkt->makeResponse(); @@ -116,8 +172,18 @@ inplaceSSTRequestToGem5PacketPtr(gem5::PacketPtr pkt, // SC interprets ExtraData == 1 as the store was successful pkt->req->setExtraData(1); } - - pkt->setData(request->data.data()); + // If there is data in the request, send it back. Only ReadResp requests + // have data associated with it. Other packets does not need to be casted. + if (!pkt->isWrite()) { + // Need to verify whether the packet is a ReadResp, otherwise the + // program will try to incorrectly cast the request object. + if (SST::Interfaces::StandardMem::ReadResp* test = + dynamic_cast(request)) { + pkt->setData(dynamic_cast( + request)->data.data() + ); + } + } // Clear out bus delay notifications pkt->headerDelay = pkt->payloadDelay = 0; From 010650d623677a9bde1e184f05169a8946f48303 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Mon, 30 Oct 2023 17:00:37 -0700 Subject: [PATCH 02/23] ext,configs: adding a disaggregated memory setup This change adds necessasry tools to simulate multiple gem5 nodes to simulate a disaggregated memory setup using gem5 and SST. Change-Id: I6e1e52d4ba8df7c161b3151c9b2c02b72fc7cc31 Signed-off-by: Kaustav Goswami --- .../arm_dm_sst_board.py | 220 ++++++++ disaggregated_memory_setup/arm_sst_board.py | 220 ++++++++ disaggregated_memory_setup/dm_caches.py | 126 +++++ disaggregated_memory_setup/dm_caches_arm.py | 122 +++++ .../mi_example_dm_caches.py | 191 +++++++ disaggregated_memory_setup/numa_config.py | 126 +++++ disaggregated_memory_setup/numa_config_sst.py | 131 +++++ .../numa_config_sst_nodes.py | 172 ++++++ .../numa_config_sst_nodes_arm.py | 139 +++++ .../numa_config_w_delay.py | 136 +++++ .../numa_config_w_ruby_caches.py | 160 ++++++ disaggregated_memory_setup/numa_config_x86.py | 129 +++++ disaggregated_memory_setup/remote_memory.py | 92 ++++ disaggregated_memory_setup/riscv_dm_board.py | 454 ++++++++++++++++ disaggregated_memory_setup/riscv_sst_board.py | 514 ++++++++++++++++++ .../simulator_project.ipynb | 159 ++++++ disaggregated_memory_setup/test_board_sst.py | 234 ++++++++ disaggregated_memory_setup/traffic_gen_sst.py | 136 +++++ disaggregated_memory_setup/x86_dm_board.py | 383 +++++++++++++ ext/sst/gem5.cc | 10 +- ext/sst/gem5.hh | 10 +- ext/sst/sst/example_board.py | 144 +++++ ext/sst/sst/example_board_x86.py | 144 +++++ ext/sst/sst/example_freq.py | 139 +++++ ext/sst/sst/example_nodes.py | 221 ++++++++ ext/sst/sst/example_nodes_multi_ISA.py | 234 ++++++++ ext/sst/sst/example_nodes_w_board.py | 206 +++++++ ext/sst/sst/example_nodes_w_board_arm.py | 201 +++++++ ext/sst/sst/example_nodes_w_board_full_mem.py | 204 +++++++ ext/sst/sst/example_traffic_gen.py | 218 ++++++++ ext/sst/sst_responder.cc | 2 + ext/sst/sst_responder_subcomponent.cc | 60 ++ ext/sst/translator.hh | 6 +- src/sst/outgoing_request_bridge.cc | 41 +- src/sst/outgoing_request_bridge.hh | 22 + 35 files changed, 5690 insertions(+), 16 deletions(-) create mode 100644 disaggregated_memory_setup/arm_dm_sst_board.py create mode 100644 disaggregated_memory_setup/arm_sst_board.py create mode 100644 disaggregated_memory_setup/dm_caches.py create mode 100644 disaggregated_memory_setup/dm_caches_arm.py create mode 100644 disaggregated_memory_setup/mi_example_dm_caches.py create mode 100644 disaggregated_memory_setup/numa_config.py create mode 100644 disaggregated_memory_setup/numa_config_sst.py create mode 100644 disaggregated_memory_setup/numa_config_sst_nodes.py create mode 100644 disaggregated_memory_setup/numa_config_sst_nodes_arm.py create mode 100644 disaggregated_memory_setup/numa_config_w_delay.py create mode 100644 disaggregated_memory_setup/numa_config_w_ruby_caches.py create mode 100644 disaggregated_memory_setup/numa_config_x86.py create mode 100644 disaggregated_memory_setup/remote_memory.py create mode 100644 disaggregated_memory_setup/riscv_dm_board.py create mode 100644 disaggregated_memory_setup/riscv_sst_board.py create mode 100644 disaggregated_memory_setup/simulator_project.ipynb create mode 100644 disaggregated_memory_setup/test_board_sst.py create mode 100644 disaggregated_memory_setup/traffic_gen_sst.py create mode 100644 disaggregated_memory_setup/x86_dm_board.py create mode 100644 ext/sst/sst/example_board.py create mode 100644 ext/sst/sst/example_board_x86.py create mode 100644 ext/sst/sst/example_freq.py create mode 100644 ext/sst/sst/example_nodes.py create mode 100644 ext/sst/sst/example_nodes_multi_ISA.py create mode 100644 ext/sst/sst/example_nodes_w_board.py create mode 100644 ext/sst/sst/example_nodes_w_board_arm.py create mode 100644 ext/sst/sst/example_nodes_w_board_full_mem.py create mode 100644 ext/sst/sst/example_traffic_gen.py diff --git a/disaggregated_memory_setup/arm_dm_sst_board.py b/disaggregated_memory_setup/arm_dm_sst_board.py new file mode 100644 index 0000000000..5ec6b28d90 --- /dev/null +++ b/disaggregated_memory_setup/arm_dm_sst_board.py @@ -0,0 +1,220 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + ArmSystem, +) + +from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation +from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease +from m5.objects.ArmFsWorkload import ArmFsLinux + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + +import os +import m5 +from abc import ABCMeta +from gem5.components.boards.arm_board import ArmBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class ArmDMSSTBoard(ArmBoard): + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_range: AddrRange, + platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), + release: ArmRelease = ArmDefaultRelease(), + ) -> None: + + self._remote_memory_range = remote_memory_range + super().__init__( + clk_freq=clk_freq, + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, + platform=platform, + release=release, + ) + + def get_remote_memory_addr_range(self): + return self._remote_memory_range + + @overrides(ArmBoard) + def _setup_board(self) -> None: + + # This board is expected to run full-system simulation. + # Loading ArmFsLinux() from `src/arch/arm/ArmFsWorkload.py` + self.workload = ArmFsLinux() + + # We are fixing the following variable for the ArmSystem to work. The + # security extension is checked while generating the dtb file in + # realview. This board does not have security extension enabled. + self._have_psci = False + + # highest_el_is_64 is set to True. True if the register width of the + # highest implemented exception level is 64 bits. + self.highest_el_is_64 = True + + # Setting up the voltage and the clock domain here for the ARM board. + # The ArmSystem/RealView expects voltage_domain to be a parameter. + # The voltage and the clock frequency are taken from the devices.py + # file from configs/example/arm. We set the clock to the same frequency + # as the user specified in the config script. + self.voltage_domain = VoltageDomain(voltage="1.0V") + self.clk_domain = SrcClockDomain( + clock=self._clk_freq, voltage_domain=self.voltage_domain + ) + + # The ARM board supports both Terminal and VncServer. + self.terminal = Terminal() + self.vncserver = VncServer() + + # Incoherent I/O Bus + self.iobus = IOXBar() + self.iobus.badaddr_responder = BadAddr() + self.iobus.default = self.iobus.badaddr_responder.pio + + # We now need to setup the dma_ports. + self._dma_ports = None + + # RealView sets up most of the on-chip and off-chip devices and GIC + # for the ARM board. These devices' information is also used to + # generate the dtb file. We then connect the I/O devices to the + # I/O bus. + self._setup_io_devices() + + # Once the realview is setup, we can continue setting up the memory + # ranges. ArmBoard's memory can only be setup once realview is + # initialized. + memory = self.get_memory() + mem_size = memory.get_size() + + # The following code is taken from configs/example/arm/devices.py. It + # sets up all the memory ranges for the board. + self.mem_ranges = [] + success = False + # self.mem_ranges.append(self.get_remote_memory_addr_range()) + for mem_range in self.realview._mem_regions: + size_in_range = min(mem_size, mem_range.size()) + self.mem_ranges.append( + AddrRange(start=mem_range.start, size=size_in_range) + ) + + mem_size -= size_in_range + if mem_size == 0: + success = True + break + + if success: + memory.set_memory_range(self.mem_ranges) + else: + raise ValueError("Memory size too big for platform capabilities") + + self.mem_ranges.append(self.get_remote_memory_addr_range()) + + # The PCI Devices. PCI devices can be added via the `_add_pci_device` + # function. + self._pci_devices = [] + + @overrides(ArmSystem) + def generateDeviceTree(self, state): + # Generate a device tree root node for the system by creating the root + # node and adding the generated subnodes of all children. + # When a child needs to add multiple nodes, this is done by also + # creating a node called '/' which will then be merged with the + # root instead of appended. + + def generateMemNode(numa_node_id, mem_range): + node = FdtNode(f"memory@{int(mem_range.start):x}") + node.append(FdtPropertyStrings("device_type", ["memory"])) + node.append( + FdtPropertyWords( + "reg", + state.addrCells(mem_range.start) + + state.sizeCells(mem_range.size()), + ) + ) + node.append(FdtPropertyWords("numa-node-id", [numa_node_id])) + return node + + root = FdtNode("/") + root.append(state.addrCellsProperty()) + root.append(state.sizeCellsProperty()) + + # Add memory nodes + for mem_range in self.mem_ranges: + root.append(generateMemNode(0, mem_range)) + root.append(generateMemNode(1, self.get_remote_memory_addr_range())) + + for node in self.recurseDeviceTree(state): + # Merge root nodes instead of adding them (for children + # that need to add multiple root level nodes) + if node.get_name() == root.get_name(): + root.merge(node) + else: + root.append(node) + + return root + + @overrides(ArmBoard) + def get_default_kernel_args(self) -> List[str]: + + # The default kernel string is taken from the devices.py file. + return [ + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "root={root_value}", + "rw", + f"mem={self.get_memory().get_size()}", + ] diff --git a/disaggregated_memory_setup/arm_sst_board.py b/disaggregated_memory_setup/arm_sst_board.py new file mode 100644 index 0000000000..ae5385530b --- /dev/null +++ b/disaggregated_memory_setup/arm_sst_board.py @@ -0,0 +1,220 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + ArmSystem, +) + +from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation +from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease +from m5.objects.ArmFsWorkload import ArmFsLinux + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + +import os +import m5 +from abc import ABCMeta +from gem5.components.boards.arm_board import ArmBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class ArmDMSSTBoard(ArmBoard): + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_range: AddrRange, + platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), + release: ArmRelease = ArmDefaultRelease(), + ) -> None: + + self._remote_memory_range = remote_memory_range + super().__init__( + clk_freq=clk_freq, + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, + platform=platform, + release=release, + ) + + def get_remote_memory_addr_range(self): + return self._remote_memory_range + + @overrides(ArmBoard) + def _setup_board(self) -> None: + + # This board is expected to run full-system simulation. + # Loading ArmFsLinux() from `src/arch/arm/ArmFsWorkload.py` + self.workload = ArmFsLinux() + + # We are fixing the following variable for the ArmSystem to work. The + # security extension is checked while generating the dtb file in + # realview. This board does not have security extension enabled. + self._have_psci = False + + # highest_el_is_64 is set to True. True if the register width of the + # highest implemented exception level is 64 bits. + self.highest_el_is_64 = True + + # Setting up the voltage and the clock domain here for the ARM board. + # The ArmSystem/RealView expects voltage_domain to be a parameter. + # The voltage and the clock frequency are taken from the devices.py + # file from configs/example/arm. We set the clock to the same frequency + # as the user specified in the config script. + self.voltage_domain = VoltageDomain(voltage="1.0V") + self.clk_domain = SrcClockDomain( + clock=self._clk_freq, voltage_domain=self.voltage_domain + ) + + # The ARM board supports both Terminal and VncServer. + self.terminal = Terminal() + self.vncserver = VncServer() + + # Incoherent I/O Bus + self.iobus = IOXBar() + self.iobus.badaddr_responder = BadAddr() + self.iobus.default = self.iobus.badaddr_responder.pio + + # We now need to setup the dma_ports. + self._dma_ports = None + + # RealView sets up most of the on-chip and off-chip devices and GIC + # for the ARM board. These devices' information is also used to + # generate the dtb file. We then connect the I/O devices to the + # I/O bus. + self._setup_io_devices() + + # Once the realview is setup, we can continue setting up the memory + # ranges. ArmBoard's memory can only be setup once realview is + # initialized. + memory = self.get_memory() + mem_size = memory.get_size() + + # The following code is taken from configs/example/arm/devices.py. It + # sets up all the memory ranges for the board. + self.mem_ranges = [] + success = False + # self.mem_ranges.append(self.get_remote_memory_addr_range()) + for mem_range in self.realview._mem_regions: + size_in_range = min(mem_size, mem_range.size()) + self.mem_ranges.append( + AddrRange(start=mem_range.start, size=size_in_range) + ) + + mem_size -= size_in_range + if mem_size == 0: + success = True + break + + if success: + memory.set_memory_range(self.mem_ranges) + else: + raise ValueError("Memory size too big for platform capabilities") + + self.mem_ranges.append(self.get_remote_memory_addr_range()) + + # The PCI Devices. PCI devices can be added via the `_add_pci_device` + # function. + self._pci_devices = [] + + @overrides(ArmSystem) + def generateDeviceTree(self, state): + # Generate a device tree root node for the system by creating the root + # node and adding the generated subnodes of all children. + # When a child needs to add multiple nodes, this is done by also + # creating a node called '/' which will then be merged with the + # root instead of appended. + + def generateMemNode(numa_node_id, mem_range): + node = FdtNode(f"memory@{int(mem_range.start):x}") + node.append(FdtPropertyStrings("device_type", ["memory"])) + node.append( + FdtPropertyWords( + "reg", + state.addrCells(mem_range.start) + + state.sizeCells(mem_range.size()), + ) + ) + node.append(FdtPropertyWords("numa-node-id", [numa_node_id])) + return node + + root = FdtNode("/") + root.append(state.addrCellsProperty()) + root.append(state.sizeCellsProperty()) + + # Add memory nodes + for mem_range in self.mem_ranges: + root.append(generateMemNode(0, mem_range)) + root.append(generateMemNode(1, self.get_remote_memory_addr_range())) + + for node in self.recurseDeviceTree(state): + # Merge root nodes instead of adding them (for children + # that need to add multiple root level nodes) + if node.get_name() == root.get_name(): + root.merge(node) + else: + root.append(node) + + return root + + @overrides(ArmBoard) + def get_default_kernel_args(self) -> List[str]: + + # The default kernel string is taken from the devices.py file. + return [ + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "root={root_value}", + "rw", + # f"mem={self.get_memory().get_size()}", + ] diff --git a/disaggregated_memory_setup/dm_caches.py b/disaggregated_memory_setup/dm_caches.py new file mode 100644 index 0000000000..f69cd30d0a --- /dev/null +++ b/disaggregated_memory_setup/dm_caches.py @@ -0,0 +1,126 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( + PrivateL1PrivateL2CacheHierarchy, +) +from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.isas import ISA +from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port + +from gem5.utils.override import overrides + + +class ClassicPL1PL2DMCache(PrivateL1PrivateL2CacheHierarchy): + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + ) -> None: + """ + :param l1d_size: The size of the L1 Data Cache (e.g., "32kB"). + :type l1d_size: str + :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB"). + :type l1i_size: str + :param l2_size: The size of the L2 Cache (e.g., "256kB"). + :type l2_size: str + :param membus: The memory bus. This parameter is optional parameter and + will default to a 64 bit width SystemXBar is not specified. + :type membus: BaseXBar + """ + super().__init__(l1i_size, l1d_size, l2_size) + + @overrides(PrivateL1PrivateL2CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for cntr in board.get_local_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + # comment these lines for SST caches + # for cntr in board.get_remote_memory().get_memory_controllers(): + # cntr.port = self.membus.mem_side_ports + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.membus.cpu_side_ports = self.l2caches[i].mem_side + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() diff --git a/disaggregated_memory_setup/dm_caches_arm.py b/disaggregated_memory_setup/dm_caches_arm.py new file mode 100644 index 0000000000..d732f7c357 --- /dev/null +++ b/disaggregated_memory_setup/dm_caches_arm.py @@ -0,0 +1,122 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( + PrivateL1PrivateL2CacheHierarchy, +) +from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.isas import ISA +from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port + +from gem5.utils.override import overrides + + +class ClassicPL1PL2DMCacheArm(PrivateL1PrivateL2CacheHierarchy): + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + ) -> None: + """ + :param l1d_size: The size of the L1 Data Cache (e.g., "32kB"). + :type l1d_size: str + :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB"). + :type l1i_size: str + :param l2_size: The size of the L2 Cache (e.g., "256kB"). + :type l2_size: str + :param membus: The memory bus. This parameter is optional parameter and + will default to a 64 bit width SystemXBar is not specified. + :type membus: BaseXBar + """ + super().__init__(l1i_size, l1d_size, l2_size) + + @overrides(PrivateL1PrivateL2CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for cntr in board.get_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.membus.cpu_side_ports = self.l2caches[i].mem_side + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() diff --git a/disaggregated_memory_setup/mi_example_dm_caches.py b/disaggregated_memory_setup/mi_example_dm_caches.py new file mode 100644 index 0000000000..d07e058dee --- /dev/null +++ b/disaggregated_memory_setup/mi_example_dm_caches.py @@ -0,0 +1,191 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.ruby.mi_example_cache_hierarchy import ( + MIExampleCacheHierarchy, +) +from gem5.components.cachehierarchies.ruby.caches.mi_example.l1_cache import ( + L1Cache, +) +from gem5.components.cachehierarchies.ruby.caches.mi_example.dma_controller import ( + DMAController, +) +from gem5.components.cachehierarchies.ruby.caches.mi_example.directory import ( + Directory, +) +from gem5.components.cachehierarchies.ruby.topologies.simple_pt2pt import ( + SimplePt2Pt, +) + +# from gem5.components.cachehierarchies.ruby.abstract_ruby_cache_hierarchy import AbstractRubyCacheHierarchy +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.coherence_protocol import CoherenceProtocol +from gem5.isas import ISA +from gem5.utils.override import overrides +from gem5.utils.requires import requires +from m5.objects import RubySystem, RubySequencer, DMASequencer, RubyPortProxy + +# from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +# from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +# from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +# from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +# from gem5.components.boards.abstract_board import AbstractBoard +# from gem5.isas import ISA +# from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port + +# from gem5.utils.override import overrides + + +class MIExampleDMCache(MIExampleCacheHierarchy): + def __init__(self, size: str, assoc: str): + """ + :param size: The size of each cache in the heirarchy. + :param assoc: The associativity of each cache. + """ + super().__init__(size, assoc) + + @overrides(MIExampleCacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + requires(coherence_protocol_required=CoherenceProtocol.MI_EXAMPLE) + + self.ruby_system = RubySystem() + + # Ruby's global network. + self.ruby_system.network = SimplePt2Pt(self.ruby_system) + + # MI Example users 5 virtual networks. + self.ruby_system.number_of_virtual_networks = 5 + self.ruby_system.network.number_of_virtual_networks = 5 + + # There is a single global list of all of the controllers to make it + # easier to connect everything to the global network. This can be + # customized depending on the topology/network requirements. + # Create one controller for each L1 cache (and the cache mem obj.) + # Create a single directory controller (Really the memory cntrl). + self._controllers = [] + for i, core in enumerate(board.get_processor().get_cores()): + cache = L1Cache( + size=self._size, + assoc=self._assoc, + network=self.ruby_system.network, + core=core, + cache_line_size=board.get_cache_line_size(), + target_isa=board.get_processor().get_isa(), + clk_domain=board.get_clock_domain(), + ) + + cache.sequencer = RubySequencer( + version=i, + dcache=cache.cacheMemory, + clk_domain=cache.clk_domain, + ) + + if board.has_io_bus(): + cache.sequencer.connectIOPorts(board.get_io_bus()) + + cache.ruby_system = self.ruby_system + + core.connect_icache(cache.sequencer.in_ports) + core.connect_dcache(cache.sequencer.in_ports) + + core.connect_walker_ports( + cache.sequencer.in_ports, cache.sequencer.in_ports + ) + + # Connect the interrupt ports + if board.get_processor().get_isa() == ISA.X86: + int_req_port = cache.sequencer.interrupt_out_port + int_resp_port = cache.sequencer.in_ports + core.connect_interrupt(int_req_port, int_resp_port) + else: + core.connect_interrupt() + + cache.ruby_system = self.ruby_system + self._controllers.append(cache) + + # Create the directory controllers + self._directory_controllers = [] + for range, port in board.get_mem_ports(): + dir = Directory( + self.ruby_system.network, + board.get_cache_line_size(), + range, + port, + ) + dir.ruby_system = self.ruby_system + self._directory_controllers.append(dir) + + for range, port in board.get_remote_mem_ports(): + dir = Directory( + self.ruby_system.network, + board.get_cache_line_size(), + range, + port, + ) + dir.ruby_system = self.ruby_system + self._directory_controllers.append(dir) + + # Create the DMA Controllers, if required. + self._dma_controllers = [] + if board.has_dma_ports(): + dma_ports = board.get_dma_ports() + for i, port in enumerate(dma_ports): + ctrl = DMAController( + self.ruby_system.network, board.get_cache_line_size() + ) + ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port) + + ctrl.ruby_system = self.ruby_system + ctrl.dma_sequencer.ruby_system = self.ruby_system + + self._dma_controllers.append(ctrl) + + self.ruby_system.num_of_sequencers = len(self._controllers) + len( + self._dma_controllers + ) + + # Connect the controllers. + self.ruby_system.controllers = self._controllers + self.ruby_system.directory_controllers = self._directory_controllers + + if len(self._dma_controllers) != 0: + self.ruby_system.dma_controllers = self._dma_controllers + + self.ruby_system.network.connectControllers( + self._controllers + + self._directory_controllers + + self._dma_controllers + ) + self.ruby_system.network.setup_buffers() + + # Set up a proxy port for the system_port. Used for load binaries and + # other functional-only things. + self.ruby_system.sys_port_proxy = RubyPortProxy() + board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) diff --git a/disaggregated_memory_setup/numa_config.py b/disaggregated_memory_setup/numa_config.py new file mode 100644 index 0000000000..2436a7c735 --- /dev/null +++ b/disaggregated_memory_setup/numa_config.py @@ -0,0 +1,126 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +2 TIMING CPU cores. The simulation ends when the startup is completed +successfully. + +Usage +----- + +``` +scons build/RISCV/gem5.opt +./build/RISCV/gem5.opt \ + configs/example/gem5_library/riscv-ubuntu-run.py +``` +""" + +import m5 +from m5.objects import Root + +from gem5.utils.requires import requires +from riscv_dm_board import RiscvDMBoard +from dm_caches import ClassicPL1PL2DMCache +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +# Here we setup the parameters of the l1 and l2 caches. + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="2MB", l1i_size="2MB", l2_size="4MB" +) + +# Memory: Dual Channel DDR4 2400 DRAM device. + +local_memory = DualChannelDDR4_2400(size="64MiB") +remote_memory = DualChannelDDR4_2400(size="64MiB") + +# remote_memory = DualChannelHBM_1000(size="4GB") + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.ATOMIC, isa=ISA.RISCV, num_cores=1 +) + +# Here we setup the board. The RiscvBoard allows for Full-System RISCV +# simulations. +board = RiscvDMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "bin/bash;" + # "m5 exit;" +] + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "disk_image": DiskImageResource( + # local_path=os.path.join( + # os.getcwd(), "/home/kaustavg/ubuntu-numa.img" + # ), + local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img", + root_partition="1", + ), + "kernel": CustomResource( + "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/SST13/kernels/gem5-resources/src/riscv-boot-exit-nodisk/riscv-pk/build/bbl" + # os.path.join(os.getcwd(), "/home/kaustavg/bbl") + ), + "readfile_contents": " ".join(cmd), + }, +) +print("______", " ".join(cmd)) +# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots +# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` +# instruction which stops the simulation. When the simulation has ended you may +# inspect `m5out/system.pc.com_1.device` to see the stdout. +# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) + +# This disk image has NUMA tools installed. +board.set_workload(workload) +simulator = Simulator(board=board) +simulator.run() +simulator.run() diff --git a/disaggregated_memory_setup/numa_config_sst.py b/disaggregated_memory_setup/numa_config_sst.py new file mode 100644 index 0000000000..42ce794ae6 --- /dev/null +++ b/disaggregated_memory_setup/numa_config_sst.py @@ -0,0 +1,131 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +2 TIMING CPU cores. The simulation ends when the startup is completed +successfully. + +Usage +----- + +``` +scons build/RISCV/gem5.opt +./build/RISCV/gem5.opt \ + configs/example/gem5_library/riscv-ubuntu-run.py +``` +""" + +import m5 +from m5.objects import Root, NoncoherentXBar + +from gem5.utils.requires import requires +from riscv_dm_board import RiscvDMBoard +from riscv_sst_board import RiscvSstBoard +from dm_caches import ClassicPL1PL2DMCache +from gem5.components.memory import SingleChannelDDR4_2400, DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +# Here we setup the parameters of the l1 and l2 caches. + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="4MB", l1i_size="4MB", l2_size="32MB" +) + +# Memory: Dual Channel DDR4 2400 DRAM device. + +local_memory = SingleChannelDDR4_2400(size="2GiB") +# This has to be an argument coming from SST's side. +remote_memory_size = "2GiB" +# remote_memory = DualChannelDDR4_2400(size="4GB") + +# remote_memory = DualChannelHBM_1000(size="4GB") + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1 +) + +# Here we setup the board. The RiscvBoard allows for Full-System RISCV +# simulations. +board = RiscvSstBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory_size=remote_memory_size, + cache_hierarchy=cache_hierarchy, +) + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "disk_image": DiskImageResource( + # CustomDiskImageResource( + local_path=os.path.join( + os.getcwd(), "/home/kaustavg/ubuntu-numa.img" + ), + root_partition="1", + ), + "kernel": CustomResource( + os.path.join(os.getcwd(), "/home/kaustavg/bbl") + ), + }, +) + +# board.cache_hierarchy.membus = NoncoherentXBar( +# frontend_latency=0, +# forward_latency=0, +# response_latency=0, +# header_latency=0, +# width=64, +# ) + +# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots +# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` +# instruction which stops the simulation. When the simulation has ended you may +# inspect `m5out/system.pc.com_1.device` to see the stdout. +# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) + +# This disk image has NUMA tools installed. +board.set_workload(workload) +board._pre_instantiate() +root = Root(full_system=True, system=board) + +# simulator = Simulator(board=board) +# simulator.run() +# simulator.run() diff --git a/disaggregated_memory_setup/numa_config_sst_nodes.py b/disaggregated_memory_setup/numa_config_sst_nodes.py new file mode 100644 index 0000000000..f3d6ef83cc --- /dev/null +++ b/disaggregated_memory_setup/numa_config_sst_nodes.py @@ -0,0 +1,172 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +2 TIMING CPU cores. The simulation ends when the startup is completed +successfully. + +Usage +----- + +``` +scons build/RISCV/gem5.opt +./build/RISCV/gem5.opt \ + configs/example/gem5_library/riscv-ubuntu-run.py +``` +""" + +import m5 +import argparse +from m5.objects import Root, NoncoherentXBar + +from gem5.utils.requires import requires +from riscv_dm_board import RiscvDMBoard +from riscv_sst_board import RiscvSstBoard +from dm_caches import ClassicPL1PL2DMCache +from gem5.components.memory import SingleChannelDDR4_2400, DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +parser = argparse.ArgumentParser( + description="An example configuration script to run multiple gem5 nodes." +) + +parser.add_argument( + "--cpu-clock-rate", + type=str, + required=True, + help="CPU clock rate. e.g. 3GHz etc", +) + +parser.add_argument( + "--instance", + type=int, + required=True, + help="Gem5 node instance", +) +args = parser.parse_args() +# Here we setup the parameters of the l1 and l2 caches. + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="128KiB", l1i_size="128KiB", l2_size="256KiB" +) + +# Memory: Dual Channel DDR4 2400 DRAM device. +# local_memory_size = str(2 * (int(args.instance) + 1)) + "GiB" +local_memory_size = "2GiB" +local_memory = SingleChannelDDR4_2400(size=local_memory_size) +# This has to be an argument coming from SST's side. +remote_memory_size = "2GiB" +print(local_memory_size) + +# remote_memory = DualChannelDDR4_2400(size="4GB") + +# remote_memory = DualChannelHBM_1000(size="4GB") + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor(cpu_type=CPUTypes.O3, isa=ISA.RISCV, num_cores=1) +# processor = SimpleSwitchableProcessor( +# first_cpu_type=CPUTypes.O3, +# isa=ISA.RISCV, num_cores=1 +# ) +# Here we setup the board. The RiscvBoard allows for Full-System RISCV +# simulations. +board = RiscvSstBoard( + clk_freq=args.cpu_clock_rate, + processor=processor, + local_memory=local_memory, + remote_memory_size=remote_memory_size, + cache_hierarchy=cache_hierarchy, + instance=args.instance, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "m5 dumpresetstats 0 ;", + "numactl --cpubind=0 --membind=0 -- /home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/stream.hw 1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=0,1 -- /home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/stream.hw 1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=1 -- /home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/stream.hw 1000000;", + "m5 dumpresetstats 0;", + "m5 exit;", +] + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "disk_image": DiskImageResource( + # CustomDiskImageResource( + # local_path=os.path.join( + # os.getcwd(), "/home/kaustavg/ubuntu-numa.img" + # "/home/kaustavg/disk-images/rv64gc-hpc-2204.img" + # ), + local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img", + root_partition="1", + ), + "kernel": CustomResource( + os.path.join(os.getcwd(), "/home/kaustavg/bbl") + ), + "readfile_contents": " ".join(cmd), + }, +) + +# board.cache_hierarchy.membus = NoncoherentXBar( +# frontend_latency=0, +# forward_latency=0, +# response_latency=0, +# header_latency=0, +# width=64, +# ) + +# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots +# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` +# instruction which stops the simulation. When the simulation has ended you may +# inspect `m5out/system.pc.com_1.device` to see the stdout. +# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) + +# This disk image has NUMA tools installed. +board.set_workload(workload) +board._pre_instantiate() +root = Root(full_system=True, system=board) +m5.instantiate() +# simulator = Simulator(board=board) +# simulator.run() +# simulator.run() diff --git a/disaggregated_memory_setup/numa_config_sst_nodes_arm.py b/disaggregated_memory_setup/numa_config_sst_nodes_arm.py new file mode 100644 index 0000000000..8a255e6c3f --- /dev/null +++ b/disaggregated_memory_setup/numa_config_sst_nodes_arm.py @@ -0,0 +1,139 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import m5 +from m5.objects import Root, OutgoingRequestBridge, AddrRange + +from gem5.utils.requires import requires +from arm_dm_sst_board import ArmDMSSTBoard +from dm_caches_arm import ClassicPL1PL2DMCacheArm +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * +from gem5.utils.override import overrides +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--command", type=str, help="Command run by guest") +parser.add_argument( + "--cpu-type", + type=str, + choices=["atomic", "timing"], + default="atomic", + help="CPU type", +) +parser.add_argument( + "--cpu-clock-rate", + type=str, + required=True, + help="CPU Clock", +) +parser.add_argument( + "--remote-memory-range", + type=str, + # default="2GiB", + required=True, + help="Remote memory range", +) +parser.add_argument( + "--local-memory-range", + type=str, + # default="2GiB", + required=True, + help="Local memory range", +) +args = parser.parse_args() +command = args.command +remote_memory_range = list(map(int, args.remote_memory_range.split(","))) +remote_memory_range = AddrRange(remote_memory_range[0], remote_memory_range[1]) +print(remote_memory_range) +requires(isa_required=ISA.ARM) + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="128KiB", l1i_size="128KiB", l2_size="256KiB" +) + +local_memory = DualChannelDDR4_2400(size=args.local_memory_range) + +cpu_type = {"atomic": CPUTypes.ATOMIC, "timing": CPUTypes.TIMING}[ + args.cpu_type +] +processor = SimpleProcessor(cpu_type=cpu_type, isa=ISA.ARM, num_cores=1) + + +class MyBoard(ArmDMSSTBoard): + @overrides(ArmDMSSTBoard) + def _pre_instantiate(self): + super()._pre_instantiate() + self.remote_memory_outgoing_bridge = OutgoingRequestBridge() + self.remote_memory_outgoing_bridge.physical_address_ranges = [ + self.get_remote_memory_addr_range() + ] + self.remote_memory_outgoing_bridge.port = ( + self.cache_hierarchy.membus.mem_side_ports + ) + + @overrides(ArmDMSSTBoard) + def get_default_kernel_args(self): + return [ + "root=/dev/vda1", + "init=/root/gem5-init.sh", + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "rw", + # f"mem={self.get_memory().get_size()}", + ] + + +board = MyBoard( + clk_freq=args.cpu_clock_rate, + processor=processor, + memory=local_memory, + remote_memory_range=remote_memory_range, + cache_hierarchy=cache_hierarchy, +) + +board.set_kernel_disk_workload( + kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), + bootloader=Resource("arm64-bootloader-foundation"), + disk_image=DiskImageResource( + "/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img" + ), + readfile_contents=f"mount -t sysfs - /sys; mount -t proc - /proc; {command};", +) + +board._pre_instantiate() +root = Root(full_system=True, system=board) + +# simulator = Simulator(board=board) +# simulator._instantiate() diff --git a/disaggregated_memory_setup/numa_config_w_delay.py b/disaggregated_memory_setup/numa_config_w_delay.py new file mode 100644 index 0000000000..1af6392f96 --- /dev/null +++ b/disaggregated_memory_setup/numa_config_w_delay.py @@ -0,0 +1,136 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +2 TIMING CPU cores. The simulation ends when the startup is completed +successfully. + +Usage +----- + +``` +scons build/RISCV/gem5.opt +./build/RISCV/gem5.opt \ + configs/example/gem5_library/riscv-ubuntu-run.py +``` +""" + +import m5 +from m5.objects import Root + +from gem5.utils.requires import requires +from riscv_dm_board import RiscvDMBoard +from dm_caches import ClassicPL1PL2DMCache +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 +from remote_memory import RemoteChanneledMemory + +# defining a new type of memory with latency added. +def RemoteDualChannelDDR4_2400( + size: Optional[str] = None, remote_offset_latency=300 +) -> AbstractMemorySystem: + """ + A dual channel memory system using DDR4_2400_8x8 based DIMM + """ + return RemoteChanneledMemory( + DDR4_2400_8x8, + 2, + 64, + size=size, + remote_offset_latency=remote_offset_latency, + ) + + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +# With RISCV, we use simple caches. + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="16kB", l1i_size="16kB", l2_size="256kB" +) + +# Local memory: Dual Channel DDR4 2400 DRAM device with no delay. +# Remote memory: Dual Channel DDR4 2400 DRAM device with 750 clocks (250 ns). +# 250 ns is taken from the TPP paper. + +local_memory = DualChannelDDR4_2400(size="512MB") +remote_memory = RemoteDualChannelDDR4_2400( + size="2GB", remote_offset_latency=750 +) + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=2 +) + +# Here we setup the board. The RiscvBoard allows for Full-System RISCV +# simulations. +board = RiscvDMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "disk_image": CustomDiskImageResource( + local_path=os.path.join( + os.getcwd(), "/home/kaustavg/disk-images/rv64gc-hpc-2204.img" + ), + disk_root_partition="1", + ), + "kernel": CustomResource( + os.path.join(os.getcwd(), "/home/kaustavg/bbl") + ), + }, +) + +# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots +# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` +# instruction which stops the simulation. When the simulation has ended you may +# inspect `m5out/system.pc.com_1.device` to see the stdout. +# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) +board.set_workload(workload) +simulator = Simulator(board=board) +simulator.run() +simulator.run() diff --git a/disaggregated_memory_setup/numa_config_w_ruby_caches.py b/disaggregated_memory_setup/numa_config_w_ruby_caches.py new file mode 100644 index 0000000000..e3026d0951 --- /dev/null +++ b/disaggregated_memory_setup/numa_config_w_ruby_caches.py @@ -0,0 +1,160 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +2 TIMING CPU cores. The simulation ends when the startup is completed +successfully. + +Usage +----- + +``` +scons build/RISCV/gem5.opt +./build/RISCV/gem5.opt \ + configs/example/gem5_library/riscv-ubuntu-run.py +``` +""" + +import m5 +from m5.objects import Root + +from gem5.utils.requires import requires +from riscv_dm_board import RiscvDMBoard +from dm_caches import ClassicPL1PL2DMCache +from mi_example_dm_caches import MIExampleDMCache +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +# With RISCV, we use simple caches. +# from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( +# PrivateL1PrivateL2CacheHierarchy, +# ) +## +# from gem5.components.cachehierarchies.classic.no_cache import NoCache + +# # Here we setup the parameters of the l1 and l2 caches. +# cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( +# l1d_size="16kB", l1i_size="16kB", l2_size="256kB" +# ) +## +# from gem5.components.cachehierarchies.ruby.mi_example_cache_hierarchy import ( +# MIExampleCacheHierarchy +# ) + +# Here we setup the parameters of the l1 and l2 caches. +# cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( +# l1d_size="16kB", l1i_size="16kB", l2_size="256kB" +# ) +# Here we setup the parameters of the l1 and l2 caches. + +# cache_hierarchy = ClassicPL1PL2DMCache( +# l1d_size="16kB", l1i_size="16kB", l2_size="256kB" +# ) + +cache_hierarchy = MIExampleDMCache(size="256kB", assoc=8) + +# cache_hierarchy = MIExampleCacheHierarchy( +# size="16kB", assoc="8" +# ) + +# cache_hierarchy = NoCache() + +# Memory: Dual Channel DDR4 2400 DRAM device. + +# local_memory = DualChannelDDR4_2400(size="512MB") +local_memory = DualChannelDDR4_2400(size="512MB") +remote_memory = DualChannelDDR4_2400(size="2GB") + +# remote_memory = DualChannelHBM_1000(size="4GB") + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=4 +) + +# Here we setup the board. The RiscvBoard allows for Full-System RISCV +# simulations. +board = RiscvDMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + # "disk_image" : DiskImageResource(os.path.join(os.getcwd(), + # "arm64-ubuntu-numa"), + # root_partition = "1"), + "disk_image": CustomDiskImageResource( + local_path=os.path.join( + os.getcwd(), + # "/home/kaustavg/ubuntu-numa.img"), + # "/home/kaustavg/ubuntu-numa-bench.img"), + "/home/kaustavg/disk-images/rv64gc-hpc-2204.img", + ), + # local_path = "/home/kaustavg/kernel/gem5-resources/src/riscv-ubuntu/disk-image/base/ubuntu-ML.img", + # "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/gem5-simpoint/gem5x/gem5/riscv-ubuntu-20221118.img"), + disk_root_partition="1", + ), + # root_partition = "1"), + "kernel": CustomResource( + os.path.join( + os.getcwd(), + # "x86-linux-kernel-5.4.49")) + # "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/gem5-simpoint/gem5x/gem5/riscv-bootloader-vmlinux-5.10")) + "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/gem5-simpoint/gem5x/gem5/bbl", + ) + ), + # "bootloader": CustomResource(os.path.join(os.getcwd(), + # "vmlinux-5.4.49-NUMA.riscv")) + }, +) + +# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots +# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` +# instruction which stops the simulation. When the simulation has ended you may +# inspect `m5out/system.pc.com_1.device` to see the stdout. +# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) +board.set_workload(workload) +simulator = Simulator(board=board) +simulator.run() +simulator.run() diff --git a/disaggregated_memory_setup/numa_config_x86.py b/disaggregated_memory_setup/numa_config_x86.py new file mode 100644 index 0000000000..e50534c03e --- /dev/null +++ b/disaggregated_memory_setup/numa_config_x86.py @@ -0,0 +1,129 @@ +# Copyright (c) 2021 The Regents of the University of California. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Script to run GAPBS benchmarks with gem5. The script expects the +benchmark program and the simulation size to run. The input is in the format + +The system is fixed with 2 CPU cores, MESI Two Level system cache and 3 GB +DDR4 memory. It uses the x86 board. + +This script will count the total number of instructions executed +in the ROI. It also tracks how much wallclock and simulated time. + +Usage: +------ + +``` +scons build/X86/gem5.opt +./build/X86/gem5.opt \ + configs/example/gem5_library/x86-gabps-benchmarks.py \ + --benchmark \ + --synthetic \ + --size +``` +""" + +import argparse +import time +import sys + +import m5 +from m5.objects import Root + +from gem5.utils.requires import requires +from gem5.components.boards.x86_board import X86Board +from gem5.components.memory import DualChannelDDR4_2400, SingleChannelDDR4_2400 + +# from gem5.components.processors.simple_switchable_processor import ( +# SimpleSwitchableProcessor, +# ) +from x86_dm_board import X86DMBoard +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.coherence_protocol import CoherenceProtocol +from gem5.resources.resource import obtain_resource +from gem5.simulate.simulator import Simulator +from gem5.simulate.exit_event import ExitEvent +from dm_caches import ClassicPL1PL2DMCache + +requires( + isa_required=ISA.X86, + kvm_required=True, +) + +# Following are the list of benchmark programs for gapbs + + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="2MB", l1i_size="2MB", l2_size="4MB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +# The X86 board only supports 3 GB of main memory. + +local_memory = SingleChannelDDR4_2400(size="1GB") +remote_mem_size = "1GiB" +# remote_memory = SingleChannelDDR4_2400(size="3GB") + +# Here we setup the processor. This is a special switchable processor in which +# a starting core type and a switch core type must be specified. Once a +# configuration is instantiated a user may call `processor.switch()` to switch +# from the starting core types to the switch core types. In this simulation +# we start with KVM cores to simulate the OS boot, then switch to the Timing +# cores for the command we wish to run after boot. + +processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=1) + +# Here we setup the board. The X86Board allows for Full-System X86 simulations + +board = X86DMBoard( + clk_freq="3GHz", + processor=processor, + memory=local_memory, + remote_memory_size=remote_mem_size, + # remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +# Here we set the FS workload, i.e., gapbs benchmark program +# After simulation has ended you may inspect +# `m5out/system.pc.com_1.device` to the stdout, if any. + +board.set_kernel_disk_workload( + # The x86 linux kernel will be automatically downloaded to the + # `~/.cache/gem5` directory if not already present. + # gapbs benchamarks was tested with kernel version 4.19.83 + kernel=obtain_resource("x86-linux-kernel-4.19.83"), + # The x86-gapbs image will be automatically downloaded to the + # `~/.cache/gem5` directory if not already present. + disk_image=obtain_resource("x86-ubuntu-18.04-img"), + # readfile_contents=command, +) +board._pre_instantiate() +root = Root(full_system=True, system=board) +# simulator = Simulator(board=board) +# simulator.run() diff --git a/disaggregated_memory_setup/remote_memory.py b/disaggregated_memory_setup/remote_memory.py new file mode 100644 index 0000000000..cd3e1afc25 --- /dev/null +++ b/disaggregated_memory_setup/remote_memory.py @@ -0,0 +1,92 @@ +# Copyright (c) 2021 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" Channeled "generic" DDR memory controllers +""" + +import m5 +from gem5.utils.override import overrides +from m5.objects import AddrRange, DRAMInterface, MemCtrl, Port +from m5.objects.XBar import NoncoherentXBar +from typing import Type, Sequence, Tuple, Optional, Union + +from gem5.components.memory.memory import ChanneledMemory + + +class RemoteChanneledMemory(ChanneledMemory): + def __init__( + self, + dram_interface_class: Type[DRAMInterface], + num_channels: Union[int, str], + interleaving_size: Union[int, str], + size: Optional[str] = None, + addr_mapping: Optional[str] = None, + remote_offset_latency: Union[int, str] = 0, + ) -> None: + self._remote_latency = remote_offset_latency + super().__init__( + dram_interface_class, + num_channels, + interleaving_size, + size, + addr_mapping, + ) + + @overrides(ChanneledMemory) + def _create_mem_interfaces_controller(self): + self._dram = [ + self._dram_class(addr_mapping=self._addr_mapping) + for _ in range(self._num_channels) + ] + self.remote_links = [ + NoncoherentXBar( + frontend_latency=self._remote_latency, + forward_latency=0, + response_latency=0, + width=8, + ) + for _ in range(self._num_channels) + ] + self.mem_ctrl = [ + MemCtrl( + dram=self._dram[i], port=self.remote_links[i].mem_side_ports + ) + for i in range(self._num_channels) + ] + + @overrides(ChanneledMemory) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return [ + (self.mem_ctrl[i].dram.range, self.remote_links[i].cpu_side_ports) + for i in range(self._num_channels) + ] + + @overrides(ChanneledMemory) + def get_memory_controllers(self): + return [ + (self.remote_links[i].cpu_side_ports) + for i in range(self._num_channels) + ] diff --git a/disaggregated_memory_setup/riscv_dm_board.py b/disaggregated_memory_setup/riscv_dm_board.py new file mode 100644 index 0000000000..1a7acb909c --- /dev/null +++ b/disaggregated_memory_setup/riscv_dm_board.py @@ -0,0 +1,454 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from typing import List, Optional, Sequence, Tuple + +from gem5.components.boards.riscv_board import RiscvBoard + +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.utils.override import overrides +from gem5.resources.resource import AbstractResource +from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload +from gem5.components.boards.abstract_system_board import AbstractSystemBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +from gem5.isas import ISA + +import m5 + +from m5.objects import AddrRange, HiFive, Frequency, Port + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + + +class RiscvDMBoard(RiscvBoard): + """ + A board capable of full system simulation for RISC-V + At a high-level, this is based on the HiFive Unmatched board from SiFive. + This board assumes that you will be booting Linux. + + **Limitations** + * Only works with classic caches + """ + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + ) -> None: + self._localMemory = local_memory + self._remoteMemory = remote_memory + super().__init__( + clk_freq=clk_freq, + processor=processor, + memory=local_memory, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + self.remote_memory = remote_memory + + if processor.get_isa() != ISA.RISCV: + raise Exception( + "The RISCVBoard requires a processor using the" + "RISCV ISA. Current processor ISA: " + f"'{processor.get_isa().name}'." + ) + + @overrides(AbstractSystemBoard) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + @overrides(AbstractSystemBoard) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_remote_memory().get_mem_ports() + + @overrides(AbstractSystemBoard) + def _setup_memory_ranges(self): + + # the memory has to be setup for both the memory ranges. there is one + # local memory range, close to the host machine and the other range is + # pure memory, far from the host. + local_memory = self.get_local_memory() + remote_memory = self.get_remote_memory() + + local_mem_size = local_memory.get_size() + remote_mem_size = remote_memory.get_size() + + self._local_mem_ranges = [ + AddrRange(start=0x80000000, size=local_mem_size) + ] + + # The remote memory starts where the local memory ends. Therefore it + # has to be offset by the local memory's size. + self._remote_mem_ranges = [ + AddrRange(start=0x80000000 + local_mem_size, size=remote_mem_size) + ] + + # using a _global_ memory range to keep a track of all the memory + # ranges. This is used to generate the dtb for this machine + self._global_mem_ranges = [] + self._global_mem_ranges.append(self._local_mem_ranges[0]) + self._global_mem_ranges.append(self._remote_mem_ranges[0]) + + # setting the memory ranges for both of the memory ranges. + local_memory.set_memory_range(self._local_mem_ranges) + remote_memory.set_memory_range(self._remote_mem_ranges) + + @overrides(RiscvBoard) + def generate_device_tree(self, outdir: str) -> None: + """Creates the dtb and dts files. + Creates two files in the outdir: 'device.dtb' and 'device.dts' + :param outdir: Directory to output the files + """ + + state = FdtState(addr_cells=2, size_cells=2, cpu_cells=1) + root = FdtNode("/") + root.append(state.addrCellsProperty()) + root.append(state.sizeCellsProperty()) + root.appendCompatible(["riscv-virtio"]) + + for idx, mem_range in enumerate(self._global_mem_ranges): + node = FdtNode("memory@%x" % int(mem_range.start)) + node.append(FdtPropertyStrings("device_type", ["memory"])) + node.append( + FdtPropertyWords( + "reg", + state.addrCells(mem_range.start) + + state.sizeCells(mem_range.size()), + ) + ) + # adding the NUMA node information so that the OS can identify all + # the NUMA ranges. + node.append(FdtPropertyWords("numa-node-id", [idx])) + root.append(node) + + # See Documentation/devicetree/bindings/riscv/cpus.txt for details. + cpus_node = FdtNode("cpus") + cpus_state = FdtState(addr_cells=1, size_cells=0) + cpus_node.append(cpus_state.addrCellsProperty()) + cpus_node.append(cpus_state.sizeCellsProperty()) + # Used by the CLINT driver to set the timer frequency. Value taken from + # RISC-V kernel docs (Note: freedom-u540 is actually 1MHz) + cpus_node.append(FdtPropertyWords("timebase-frequency", [100000000])) + + for i, core in enumerate(self.get_processor().get_cores()): + node = FdtNode(f"cpu@{i}") + node.append(FdtPropertyStrings("device_type", "cpu")) + node.append(FdtPropertyWords("reg", state.CPUAddrCells(i))) + # The CPUs are also associated to the NUMA nodes. All the CPUs are + # bound to the first NUMA node. + node.append(FdtPropertyWords("numa-node-id", [0])) + node.append(FdtPropertyStrings("mmu-type", "riscv,sv48")) + node.append(FdtPropertyStrings("status", "okay")) + node.append(FdtPropertyStrings("riscv,isa", "rv64imafdc")) + # TODO: Should probably get this from the core. + freq = self.clk_domain.clock[0].frequency + node.append(FdtPropertyWords("clock-frequency", freq)) + node.appendCompatible(["riscv"]) + int_phandle = state.phandle(f"cpu@{i}.int_state") + node.appendPhandle(f"cpu@{i}") + + int_node = FdtNode("interrupt-controller") + int_state = FdtState(interrupt_cells=1) + int_phandle = int_state.phandle(f"cpu@{i}.int_state") + int_node.append(int_state.interruptCellsProperty()) + int_node.append(FdtProperty("interrupt-controller")) + int_node.appendCompatible("riscv,cpu-intc") + int_node.append(FdtPropertyWords("phandle", [int_phandle])) + + node.append(int_node) + cpus_node.append(node) + + root.append(cpus_node) + + soc_node = FdtNode("soc") + soc_state = FdtState(addr_cells=2, size_cells=2) + soc_node.append(soc_state.addrCellsProperty()) + soc_node.append(soc_state.sizeCellsProperty()) + soc_node.append(FdtProperty("ranges")) + soc_node.appendCompatible(["simple-bus"]) + + # CLINT node + clint = self.platform.clint + clint_node = clint.generateBasicPioDeviceNode( + soc_state, "clint", clint.pio_addr, clint.pio_size + ) + int_extended = list() + for i, core in enumerate(self.get_processor().get_cores()): + phandle = soc_state.phandle(f"cpu@{i}.int_state") + int_extended.append(phandle) + int_extended.append(0x3) + int_extended.append(phandle) + int_extended.append(0x7) + clint_node.append( + FdtPropertyWords("interrupts-extended", int_extended) + ) + # NUMA information is also associated with the CLINT controller. + # In this board, the objective to associate one NUMA node to the CPUs + # and the other node with no CPUs. To generalize this, an additional + # CLINT controller has to be created on this board, which will make it + # completely NUMA, instead of just disaggregated NUMA-like board. + clint_node.append(FdtPropertyWords("numa-node-id", [0])) + clint_node.appendCompatible(["riscv,clint0"]) + soc_node.append(clint_node) + + # PLIC node + plic = self.platform.plic + plic_node = plic.generateBasicPioDeviceNode( + soc_state, "plic", plic.pio_addr, plic.pio_size + ) + + int_state = FdtState(addr_cells=0, interrupt_cells=1) + plic_node.append(int_state.addrCellsProperty()) + plic_node.append(int_state.interruptCellsProperty()) + + phandle = int_state.phandle(plic) + plic_node.append(FdtPropertyWords("phandle", [phandle])) + # Similar to the CLINT interrupt controller, another PLIC controller is + # required to make this board a general NUMA like board. + plic_node.append(FdtPropertyWords("numa-node-id", [0])) + plic_node.append(FdtPropertyWords("riscv,ndev", [plic.n_src - 1])) + + int_extended = list() + for i, core in enumerate(self.get_processor().get_cores()): + phandle = state.phandle(f"cpu@{i}.int_state") + int_extended.append(phandle) + int_extended.append(0xB) + int_extended.append(phandle) + int_extended.append(0x9) + + plic_node.append(FdtPropertyWords("interrupts-extended", int_extended)) + plic_node.append(FdtProperty("interrupt-controller")) + plic_node.appendCompatible(["riscv,plic0"]) + + soc_node.append(plic_node) + + # PCI + pci_state = FdtState( + addr_cells=3, size_cells=2, cpu_cells=1, interrupt_cells=1 + ) + pci_node = FdtNode("pci") + + if int(self.platform.pci_host.conf_device_bits) == 8: + pci_node.appendCompatible("pci-host-cam-generic") + elif int(self.platform.pci_host.conf_device_bits) == 12: + pci_node.appendCompatible("pci-host-ecam-generic") + else: + m5.fatal("No compatibility string for the set conf_device_width") + + pci_node.append(FdtPropertyStrings("device_type", ["pci"])) + + # Cell sizes of child nodes/peripherals + pci_node.append(pci_state.addrCellsProperty()) + pci_node.append(pci_state.sizeCellsProperty()) + pci_node.append(pci_state.interruptCellsProperty()) + # PCI address for CPU + pci_node.append( + FdtPropertyWords( + "reg", + soc_state.addrCells(self.platform.pci_host.conf_base) + + soc_state.sizeCells(self.platform.pci_host.conf_size), + ) + ) + + # Ranges mapping + # For now some of this is hard coded, because the PCI module does not + # have a proper full understanding of the memory map, but adapting the + # PCI module is beyond the scope of what I'm trying to do here. + # Values are taken from the ARM VExpress_GEM5_V1 platform. + ranges = [] + # Pio address range + ranges += self.platform.pci_host.pciFdtAddr(space=1, addr=0) + ranges += soc_state.addrCells(self.platform.pci_host.pci_pio_base) + ranges += pci_state.sizeCells(0x10000) # Fixed size + + # AXI memory address range + ranges += self.platform.pci_host.pciFdtAddr(space=2, addr=0) + ranges += soc_state.addrCells(self.platform.pci_host.pci_mem_base) + ranges += pci_state.sizeCells(0x40000000) # Fixed size + pci_node.append(FdtPropertyWords("ranges", ranges)) + + # Interrupt mapping + plic_handle = int_state.phandle(plic) + int_base = self.platform.pci_host.int_base + + interrupts = [] + + for i in range(int(self.platform.pci_host.int_count)): + interrupts += self.platform.pci_host.pciFdtAddr( + device=i, addr=0 + ) + [int(i) + 1, plic_handle, int(int_base) + i] + + pci_node.append(FdtPropertyWords("interrupt-map", interrupts)) + + int_count = int(self.platform.pci_host.int_count) + if int_count & (int_count - 1): + fatal("PCI interrupt count should be power of 2") + + intmask = self.platform.pci_host.pciFdtAddr( + device=int_count - 1, addr=0 + ) + [0x0] + pci_node.append(FdtPropertyWords("interrupt-map-mask", intmask)) + + if self.platform.pci_host._dma_coherent: + pci_node.append(FdtProperty("dma-coherent")) + + soc_node.append(pci_node) + + # UART node + uart = self.platform.uart + uart_node = uart.generateBasicPioDeviceNode( + soc_state, "uart", uart.pio_addr, uart.pio_size + ) + uart_node.append( + FdtPropertyWords("interrupts", [self.platform.uart_int_id]) + ) + uart_node.append(FdtPropertyWords("clock-frequency", [0x384000])) + uart_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + uart_node.appendCompatible(["ns8250"]) + soc_node.append(uart_node) + + # VirtIO MMIO disk node + disk = self.disk + disk_node = disk.generateBasicPioDeviceNode( + soc_state, "virtio_mmio", disk.pio_addr, disk.pio_size + ) + disk_node.append(FdtPropertyWords("interrupts", [disk.interrupt_id])) + disk_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + disk_node.appendCompatible(["virtio,mmio"]) + soc_node.append(disk_node) + + # VirtIO MMIO rng node + rng = self.rng + rng_node = rng.generateBasicPioDeviceNode( + soc_state, "virtio_mmio", rng.pio_addr, rng.pio_size + ) + rng_node.append(FdtPropertyWords("interrupts", [rng.interrupt_id])) + rng_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + rng_node.appendCompatible(["virtio,mmio"]) + soc_node.append(rng_node) + + root.append(soc_node) + + fdt = Fdt() + fdt.add_rootnode(root) + fdt.writeDtsFile(os.path.join(outdir, "device.dts")) + fdt.writeDtbFile(os.path.join(outdir, "device.dtb")) + + @overrides(KernelDiskWorkload) + def get_default_kernel_args(self) -> List[str]: + # return ["console=ttyS0", "root={root_value}", "init=/root/gem5_init.sh", "rw"] + return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"] + + @overrides(AbstractBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(AbstractBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory_setup/riscv_sst_board.py b/disaggregated_memory_setup/riscv_sst_board.py new file mode 100644 index 0000000000..4c0513b704 --- /dev/null +++ b/disaggregated_memory_setup/riscv_sst_board.py @@ -0,0 +1,514 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from typing import List, Optional, Sequence, Tuple + +from gem5.components.boards.riscv_board import RiscvBoard + +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.utils.override import overrides +from gem5.resources.resource import AbstractResource +from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload +from gem5.components.boards.abstract_system_board import AbstractSystemBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +from gem5.isas import ISA + +import m5 + +from m5.objects import ( + AddrRange, + HiFive, + Frequency, + Port, + OutgoingRequestBridge, + NoncoherentXBar, +) + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + + +class RiscvSstBoard(RiscvBoard): + """ + A board capable of full system simulation for RISC-V + At a high-level, this is based on the HiFive Unmatched board from SiFive. + This board assumes that you will be booting Linux. + + **Limitations** + * Only works with classic caches + """ + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory_size: str, + cache_hierarchy: AbstractCacheHierarchy, + instance: int, + ) -> None: + self._localMemory = local_memory + # Since the remote memory is defined in SST's side, we only need the + # size of this memory while setting up stuff from Gem5's side. + self._remoteMemory = OutgoingRequestBridge() + self._remoteMemorySize = remote_memory_size + self._instanceCount = instance + super().__init__( + clk_freq=clk_freq, + processor=processor, + memory=local_memory, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + # Do not setup the remote memory here. + self.remote_memory = self._remoteMemory + + if processor.get_isa() != ISA.RISCV: + raise Exception( + "The RISCVBoard requires a processor using the" + "RISCV ISA. Current processor ISA: " + f"'{processor.get_isa().name}'." + ) + + @overrides(AbstractSystemBoard) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + # raise Exception("cannot call this method") + return self._remoteMemory + + @overrides(AbstractSystemBoard) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + # return self.get_remote_memory().port # get_mem_ports() + + @overrides(AbstractSystemBoard) + def _setup_memory_ranges(self): + + # the memory has to be setup for both the memory ranges. there is one + # local memory range, close to the host machine and the other range is + # pure memory, far from the host. + local_memory = self.get_local_memory() + remote_memory = self.get_remote_memory() + + local_mem_size = local_memory.get_size() + # This is a string + remote_mem_size = self._remoteMemorySize + + self._local_mem_ranges = [ + AddrRange(start=0x80000000, size=local_mem_size) + ] + + # The remote memory starts where the local memory ends. Therefore it + # has to be offset by the local memory's size. + # self._remote_mem_ranges = [ + # AddrRange(start=0x80000000 + local_mem_size, size=remote_mem_size) + # ] + + # keeping a hole in the mem ranges to simulate multiple nodes without + # using a translator simobject. + remote_memory_start_addr = ( + 0x80000000 + local_mem_size + self._instanceCount * 0x80000000 + ) + self._remote_mem_ranges = [ + AddrRange(start=remote_memory_start_addr, size=remote_mem_size) + ] + + # using a _global_ memory range to keep a track of all the memory + # ranges. This is used to generate the dtb for this machine + self._global_mem_ranges = [] + self._global_mem_ranges.append(self._local_mem_ranges[0]) + self._global_mem_ranges.append(self._remote_mem_ranges[0]) + + # setting the memory ranges for both of the memory ranges. + local_memory.set_memory_range(self._local_mem_ranges) + remote_memory.physical_address_ranges = self._remote_mem_ranges + # remote_memory.set_memory_range(self._remote_mem_ranges) + + @overrides(RiscvBoard) + def generate_device_tree(self, outdir: str) -> None: + """Creates the dtb and dts files. + Creates two files in the outdir: 'device.dtb' and 'device.dts' + :param outdir: Directory to output the files + """ + + state = FdtState(addr_cells=2, size_cells=2, cpu_cells=1) + root = FdtNode("/") + root.append(state.addrCellsProperty()) + root.append(state.sizeCellsProperty()) + root.appendCompatible(["riscv-virtio"]) + + for idx, mem_range in enumerate(self._global_mem_ranges): + node = FdtNode("memory@%x" % int(mem_range.start)) + node.append(FdtPropertyStrings("device_type", ["memory"])) + node.append( + FdtPropertyWords( + "reg", + state.addrCells(mem_range.start) + + state.sizeCells(mem_range.size()), + ) + ) + # adding the NUMA node information so that the OS can identify all + # the NUMA ranges. + node.append(FdtPropertyWords("numa-node-id", [idx])) + root.append(node) + + # See Documentation/devicetree/bindings/riscv/cpus.txt for details. + cpus_node = FdtNode("cpus") + cpus_state = FdtState(addr_cells=1, size_cells=0) + cpus_node.append(cpus_state.addrCellsProperty()) + cpus_node.append(cpus_state.sizeCellsProperty()) + # Used by the CLINT driver to set the timer frequency. Value taken from + # RISC-V kernel docs (Note: freedom-u540 is actually 1MHz) + cpus_node.append(FdtPropertyWords("timebase-frequency", [100000000])) + + for i, core in enumerate(self.get_processor().get_cores()): + node = FdtNode(f"cpu@{i}") + node.append(FdtPropertyStrings("device_type", "cpu")) + node.append(FdtPropertyWords("reg", state.CPUAddrCells(i))) + # The CPUs are also associated to the NUMA nodes. All the CPUs are + # bound to the first NUMA node. + node.append(FdtPropertyWords("numa-node-id", [0])) + node.append(FdtPropertyStrings("mmu-type", "riscv,sv48")) + node.append(FdtPropertyStrings("status", "okay")) + node.append(FdtPropertyStrings("riscv,isa", "rv64imafdc")) + # TODO: Should probably get this from the core. + freq = self.clk_domain.clock[0].frequency + node.append(FdtPropertyWords("clock-frequency", freq)) + node.appendCompatible(["riscv"]) + int_phandle = state.phandle(f"cpu@{i}.int_state") + node.appendPhandle(f"cpu@{i}") + + int_node = FdtNode("interrupt-controller") + int_state = FdtState(interrupt_cells=1) + int_phandle = int_state.phandle(f"cpu@{i}.int_state") + int_node.append(int_state.interruptCellsProperty()) + int_node.append(FdtProperty("interrupt-controller")) + int_node.appendCompatible("riscv,cpu-intc") + int_node.append(FdtPropertyWords("phandle", [int_phandle])) + + node.append(int_node) + cpus_node.append(node) + + root.append(cpus_node) + + soc_node = FdtNode("soc") + soc_state = FdtState(addr_cells=2, size_cells=2) + soc_node.append(soc_state.addrCellsProperty()) + soc_node.append(soc_state.sizeCellsProperty()) + soc_node.append(FdtProperty("ranges")) + soc_node.appendCompatible(["simple-bus"]) + + # CLINT node + clint = self.platform.clint + clint_node = clint.generateBasicPioDeviceNode( + soc_state, "clint", clint.pio_addr, clint.pio_size + ) + int_extended = list() + for i, core in enumerate(self.get_processor().get_cores()): + phandle = soc_state.phandle(f"cpu@{i}.int_state") + int_extended.append(phandle) + int_extended.append(0x3) + int_extended.append(phandle) + int_extended.append(0x7) + clint_node.append( + FdtPropertyWords("interrupts-extended", int_extended) + ) + # NUMA information is also associated with the CLINT controller. + # In this board, the objective to associate one NUMA node to the CPUs + # and the other node with no CPUs. To generalize this, an additional + # CLINT controller has to be created on this board, which will make it + # completely NUMA, instead of just disaggregated NUMA-like board. + clint_node.append(FdtPropertyWords("numa-node-id", [0])) + clint_node.appendCompatible(["riscv,clint0"]) + soc_node.append(clint_node) + + # PLIC node + plic = self.platform.plic + plic_node = plic.generateBasicPioDeviceNode( + soc_state, "plic", plic.pio_addr, plic.pio_size + ) + + int_state = FdtState(addr_cells=0, interrupt_cells=1) + plic_node.append(int_state.addrCellsProperty()) + plic_node.append(int_state.interruptCellsProperty()) + + phandle = int_state.phandle(plic) + plic_node.append(FdtPropertyWords("phandle", [phandle])) + # Similar to the CLINT interrupt controller, another PLIC controller is + # required to make this board a general NUMA like board. + plic_node.append(FdtPropertyWords("numa-node-id", [0])) + plic_node.append(FdtPropertyWords("riscv,ndev", [plic.n_src - 1])) + + int_extended = list() + for i, core in enumerate(self.get_processor().get_cores()): + phandle = state.phandle(f"cpu@{i}.int_state") + int_extended.append(phandle) + int_extended.append(0xB) + int_extended.append(phandle) + int_extended.append(0x9) + + plic_node.append(FdtPropertyWords("interrupts-extended", int_extended)) + plic_node.append(FdtProperty("interrupt-controller")) + plic_node.appendCompatible(["riscv,plic0"]) + + soc_node.append(plic_node) + + # PCI + pci_state = FdtState( + addr_cells=3, size_cells=2, cpu_cells=1, interrupt_cells=1 + ) + pci_node = FdtNode("pci") + + if int(self.platform.pci_host.conf_device_bits) == 8: + pci_node.appendCompatible("pci-host-cam-generic") + elif int(self.platform.pci_host.conf_device_bits) == 12: + pci_node.appendCompatible("pci-host-ecam-generic") + else: + m5.fatal("No compatibility string for the set conf_device_width") + + pci_node.append(FdtPropertyStrings("device_type", ["pci"])) + + # Cell sizes of child nodes/peripherals + pci_node.append(pci_state.addrCellsProperty()) + pci_node.append(pci_state.sizeCellsProperty()) + pci_node.append(pci_state.interruptCellsProperty()) + # PCI address for CPU + pci_node.append( + FdtPropertyWords( + "reg", + soc_state.addrCells(self.platform.pci_host.conf_base) + + soc_state.sizeCells(self.platform.pci_host.conf_size), + ) + ) + + # Ranges mapping + # For now some of this is hard coded, because the PCI module does not + # have a proper full understanding of the memory map, but adapting the + # PCI module is beyond the scope of what I'm trying to do here. + # Values are taken from the ARM VExpress_GEM5_V1 platform. + ranges = [] + # Pio address range + ranges += self.platform.pci_host.pciFdtAddr(space=1, addr=0) + ranges += soc_state.addrCells(self.platform.pci_host.pci_pio_base) + ranges += pci_state.sizeCells(0x10000) # Fixed size + + # AXI memory address range + ranges += self.platform.pci_host.pciFdtAddr(space=2, addr=0) + ranges += soc_state.addrCells(self.platform.pci_host.pci_mem_base) + ranges += pci_state.sizeCells(0x40000000) # Fixed size + pci_node.append(FdtPropertyWords("ranges", ranges)) + + # Interrupt mapping + plic_handle = int_state.phandle(plic) + int_base = self.platform.pci_host.int_base + + interrupts = [] + + for i in range(int(self.platform.pci_host.int_count)): + interrupts += self.platform.pci_host.pciFdtAddr( + device=i, addr=0 + ) + [int(i) + 1, plic_handle, int(int_base) + i] + + pci_node.append(FdtPropertyWords("interrupt-map", interrupts)) + + int_count = int(self.platform.pci_host.int_count) + if int_count & (int_count - 1): + fatal("PCI interrupt count should be power of 2") + + intmask = self.platform.pci_host.pciFdtAddr( + device=int_count - 1, addr=0 + ) + [0x0] + pci_node.append(FdtPropertyWords("interrupt-map-mask", intmask)) + + if self.platform.pci_host._dma_coherent: + pci_node.append(FdtProperty("dma-coherent")) + + soc_node.append(pci_node) + + # UART node + uart = self.platform.uart + uart_node = uart.generateBasicPioDeviceNode( + soc_state, "uart", uart.pio_addr, uart.pio_size + ) + uart_node.append( + FdtPropertyWords("interrupts", [self.platform.uart_int_id]) + ) + uart_node.append(FdtPropertyWords("clock-frequency", [0x384000])) + uart_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + uart_node.appendCompatible(["ns8250"]) + soc_node.append(uart_node) + + # VirtIO MMIO disk node + disk = self.disk + disk_node = disk.generateBasicPioDeviceNode( + soc_state, "virtio_mmio", disk.pio_addr, disk.pio_size + ) + disk_node.append(FdtPropertyWords("interrupts", [disk.interrupt_id])) + disk_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + disk_node.appendCompatible(["virtio,mmio"]) + soc_node.append(disk_node) + + # VirtIO MMIO rng node + rng = self.rng + rng_node = rng.generateBasicPioDeviceNode( + soc_state, "virtio_mmio", rng.pio_addr, rng.pio_size + ) + rng_node.append(FdtPropertyWords("interrupts", [rng.interrupt_id])) + rng_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + rng_node.appendCompatible(["virtio,mmio"]) + soc_node.append(rng_node) + + root.append(soc_node) + + fdt = Fdt() + fdt.add_rootnode(root) + fdt.writeDtsFile(os.path.join(outdir, "device.dts")) + fdt.writeDtbFile(os.path.join(outdir, "device.dtb")) + + @overrides(KernelDiskWorkload) + def get_default_kernel_args(self) -> List[str]: + return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"] + + @overrides(AbstractBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + # Add a NoncoherentXBar here + + self.remote_link = NoncoherentXBar( + frontend_latency=0, + forward_latency=0, + response_latency=750, + width=256, + ) + self.get_remote_memory().port = self.remote_link.mem_side_ports + self.get_cache_hierarchy().membus.mem_side_ports = ( + self.remote_link.cpu_side_ports + ) + + # self.get_remote_memory().port = \ + # self.get_cache_hierarchy().membus.mem_side_ports + # self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(RiscvBoard) + def get_default_kernel_args(self) -> List[str]: + return [ + "console=ttyS0", + "root={root_value}", + "rw", + "init=/root/gem5-init.sh" + # "init=\"mount -t sysfs - /sys; mount -t proc - /proc; m5 exit;\"" + # "init=/bin/bash" + ] + + @overrides(AbstractBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + print( + "__ranges__", self.get_remote_memory().physical_address_ranges[0] + ) + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + # self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory_setup/simulator_project.ipynb b/disaggregated_memory_setup/simulator_project.ipynb new file mode 100644 index 0000000000..e7756e81f5 --- /dev/null +++ b/disaggregated_memory_setup/simulator_project.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "# This script generates plots for the disaggregated memory project.\n", + "# Run this script from amarillo.cs.ucdavis.edu\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plot_idx = 0\n", + "# overwrite this for each plot\n", + "data = np.zeros((3,4))\n", + "home_path = \"/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/\" \\\n", + " + \"SST13/gem5/ext/sst/final_data\"\n", + "kernels = [\"Copy\", \"Scale\", \"Add\", \"Triad\"]\n", + "bar_width = float(1/4)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Timing results" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results 9\n", + "[0, 1, 2, 3] [[6.308304 5.890346 7.01689 6.978944]\n", + " [6.008993 5.932203 7.050845 6.956073]\n", + " [1.881847 1.874148 2.094582 2.086612]]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAEICAYAAAB25L6yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAYGElEQVR4nO3df3RV5Z3v8ffHAMaiFYVchxIxuVoVf6KNFiu1nYo/iz+Hi+CPltY2HatdVudeq6OzBq/t7b0zVtuqvV1UrdpWQCuIo60/WnWqtxUMioIGf0xlagBrCCiCIgrf+8fZiUk4IeeQc5In4fNai0XO3s9++O69wufs85znPEcRgZmZpWuHvi7AzMy2zkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1JYMSRdJapD0vqTbCmhfLelXklokrZe0QNLETm0ek9Qsaa2k5ySd1mn/SEm3SFop6R1JSyVdLWlotj+yvtdJWi7pOkkV7fbt06m/6ZJ+2eOLYdaOg9pSsgL4LnBrdw0l7Q48CWwEDgRGANcDd0qa1K7pxcDIiPg4UA/8UtLIdn38CdgJOCoidgGOA4YBe7fr49CI2Bk4Fjgb+HoPztGsaIP6ugCzVhExB0BSHVDdTfNLgHXA+RGxOds2U9Jo4AeS7omc59v/E8BgYE9gJXAp8A5wbmsfEfE6uXDPV99SSU8AB23TCZptI99RW391HHBPu5BudRcwGti3dYOk+yVtAOYDjwMN2a4JwJw8feQl6QDgs8CzPSvdrDi+o7b+agS5u+LOVrbb/xJAREyUNJhcMI9pF8zDu+ijs2ckbQJWAzcDP+9J4WbFclBbf7UKGJln+8h2+9tExAfAbyVdLOnViLgPaOmij84Oj4hX82zfRG4opb3BwAcF9GlWMA99WH/1O+BMSZ1/hycDrwMvd3HcID56o/B3wBl5+ijUX4CaTttqgf/cxv7M8nJQWzIkDZJUCVQAFZIqJXX1qu96YFfgFkl/k7WdClwJ/I+ICEn7SzpJ0k6SBks6FzgG+Pesj+uAjwO3S9orq2FUNgXvkAJKng1clU0T3EHSBOAU4Nfbeg3M8nFQW0quAt4DLgfOzX6+Kl/DiGgBxgOVwIvkhjEuBc6LiNlZMwHTgTeBZnKzOc6KiGeyPlYDnyE3VDFf0jvA74G3gXxDHZ39T+CP5KYJrgH+BTgnIpYUc9Jm3ZG/OMDMLG2+ozYzS5yD2swscQ5qM7PEOajNzBJXlg+8jBgxImpqasrRtZnZgLRw4cJVEVGVb19ZgrqmpoaGhobuG5qZGQCSuvyglIc+zMwS56A2M0ucg9rMLHFePc/MtuqDDz6gqamJDRs29HUpA0JlZSXV1dUMHtx54cWuOajNbKuamprYZZddqKmpQVJfl9OvRQQtLS00NTVRW1tb8HEe+jCzrdqwYQPDhw93SJeAJIYPH170qxMHtZl1yyFdOttyLbsNakn7SVrU7s9aSd/elgLNzKx43Y5RR8RLwFgASRXAcmBuecsys1TVXP5ASftb9r+/2G0bSVx66aX84Ac/AODaa69l3bp1TJ8+nWnTpjFx4kQmTZrU1n7nnXdm3bp1LFu2jNraWq688kq++93vArBq1SpGjhzJN77xDW688ca2Y8aOHcv+++/PrFmzuqzj+9//PrfccgsVFRX8+Mc/5oQTTtjW0y5KsW8mHgv8R0T4q4ZsYJu+a4n7e7u0/W1ndtxxR+bMmcMVV1zBiBEjijq2traWBx54oC2o7777bg488MAObRobG9m0aRNPPPEE69evZ+jQoVv08+KLLzJr1ixeeOEFVqxYwYQJE3j55ZepqKjY9hMrULFBPQWYmW+HpHqgHmD06NE9LMtsYCn1XSgUdic6UAwaNIj6+nquv/56vve97xV17Mc+9jHGjBlDQ0MDdXV1zJ49m8mTJ7NixYq2NjNnzuS8886jsbGRefPmcfbZZ2/Rz7x585gyZQo77rgjtbW17LPPPixYsICjjjqqx+fXnYKDWtIQ4FTginz7I2IGMAOgrq7OXxtjZoVZ8Wz3bWIzF545njETzuHkc+pZ+dZ7vPvuBp5veos16zfyny3reb7prbbmmwOeb3qL5SvXsuGDTYybMJEbfnY752yu5L0Pg02Vw1i17rW29rNnz+aRRx5h6dKl3HDDDXmDevny5YwbN67tcXV1NcuXL+/RqReqmFkfJwHPRMRfy1WMmVlXPr7Lzkz8uynceeuMjjvyzKLoPLPi6M9P4KknHuPB++ZwwilndNjX0NDAiBEjGD16NMceeyzPPvssq1evLnn9PVFMUE+li2EPM7PecO75F3Dv7F/w3nvvtm0bttvurH37rbbHb69Zw7Ddd+9w3OAhQxhz8FjumHETx518Wod9M2fOZOnSpdTU1LD33nuzdu1a7rnnHubOncvYsWMZO3YsDQ0NjBo1itdff73tuKamJkaNGlWeE+2koKCWNBQ4DphT3nLMzLq26267cfzE05k76xdt2+qOOpqH/m0uH2zcCMC8u+/kiKM+u8WxX6q/kG9fMZ1dd9utbdvmzZu56667WLx4McuWLWPZsmXMmzePmTNncsYZZ7Bo0SIWLVpEXV0dp556KrNmzeL999/ntdde45VXXuHII48s/0lT4Bh1RKwHhpe5FjPrBwp+E7OQsedt8KX6i5h1281tjz834UQaFz/HlJM/T0VFBdV71XLV96/b4rh99hvDPvuN6bDtiSeeYNSoUXziE59o23bMMcfw4osvsnLlSkaOHNm2/cADD2Ty5MkccMABDBo0iJtuuqlXZnwAKKL07/vV1dWFvzjA+rUST8+r2XBnSfuD3pv10djYyJgxY7pv2FkZgvr5zYWvj1GIQ6qHlbS/QuW7ppIWRkRdvvb+CLmZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmifNXcZlZcUq9smD949020ajDubT+XL78TzcAcPtPb+Ddd9dzwaWX80+XfJNjJpzAcV/86BOH4/ar5qmXmlj++l84+TOH8vVv/QMXXXYVAGtWtzDhU/vzd+dMY9ZtP2s7plTLnF555ZXccccdrFmzhnXr1hV6FbbKd9RmlrwddxzCnN8+yprVLUUfO2r0Xvzh0YfbHj9y/73sve/+Hdp0XuY0n/bLnD744IN885vfZNOmTVu0O+WUU1iwYEHRdW6Ng9rMkjeoooL6c87klz/7SdHHVlbuxH/dZ19eeC73AZyH/m0ux088vUOb1mVOjz/+eObNm5e3n66WOe1s3LhxHT7RWAoOajPrFy6cNpnf3Hs376wt/ksYTjz1TB68bw5vrGhih4oKqvboGKSzZ89mypQpTJ06lZkz8689t3z5cvbcc8+2x6kuc2pm1me8zKmZWT/gZU7NzBKX6jKnxx57bFmHQTw9z8yKU+gX9W4ny5xu3ryZV199ld2zu/jLLruMO++8k3fffZfq6mq+9rWvMX369B6d84Bf5rQvvtreBgAvc9rGy5xu3ZIlS7j11lu57rotnxy64mVOzcx60UEHHVRUSG8LB7WZWeIc1GZmiXNQm5klrtBvIR8m6deSlkpqlHRUuQszM7OcQqfn/Qh4MCImSRoCfKyMNZmZWTvdBrWkXYFjgGkAEbER2FjesswsVQfffnBJ+1t83B3dtklhmdOWlhYmTZrE008/zbRp07jxxht7ctpFKeSOuhZoBn4u6VBgIXBxRORfC7CnSr3WLaWfv2pmvat1mdNTL2xht92HF3Vs6zKnrUFdyDKnQ4cO3aKfyspKrrnmGpYsWcKSJUu2/WS2QSFj1IOAw4H/GxGHAeuByzs3klQvqUFSQ3Nzc4nLNLPtWQrLnA4dOpTx48dTWVlZdA09VUhQNwFNETE/e/xrcsHdQUTMiIi6iKirqqoqZY1mZn2+zGlf6nboIyLekPS6pP0i4iXgWODF8pdm26zUw0eFru1gVkbtlzntcFdb4DKnN137vxheVbXVZU5HjRrFV7/6VVavXt22dkcKCp318S3gV9mMjz8DXylfSZaaUq+XAl4zxbbNuedfwJSTP8dpk89p21bsMqdzf/8Ujz/y27Z97Zc5BdqWOR0xYgRXX301ADfffDN1dXmX4egVBQV1RCwC+q5KMzM6LnN6+lnnArllTn91y085ddJUBg8ZstVlTuvGHd3lMqetK+g99thjXHPNNTz66KOcccYZW/TTF7zMqZkVZfGXFxfWcIAtcwpQU1PD2rVr2bhxI/feey8PP/wwBxxwQInPcEsOajNL3rpX/l/bz8Or/gvzX1nRYf/fX/Id/v6S72xx3Kg9RzPn93/aYvtpk8/mtMlnc0j1MJ566qkO+yoqKnjjjTfy1rFs2bJtqL7nvNaHmVniHNRmZolzUJtZt8rxTVDbq225lg5qM9uqyspKWlpaHNYlEBG0tLQU/elGv5loZltVXV1NU1MTRS8N8dabJa/lryV+smh8Z6eS9leIyspKqqurizrGQW1mWzV48GBqa7fhS2Wnjyt5LSeV+EuC+8sHrzz0YWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWuIJWz5O0DHgH2AR8GBH+RnIzs15SzDKnfxsRq8pWiZmZ5eWhDzOzxBUa1AE8LGmhpPp8DSTVS2qQ1FD0N0GYmVmXCg3q8RFxOHAScKGkYzo3iIgZEVEXEXVVVVUlLdLMbHtWUFBHxPLs7zeBucCR5SzKzMw+0m1QSxoqaZfWn4HjgSXlLszMzHIKmfWxBzBXUmv7OyPiwbJWZWZmbboN6oj4M3BoL9RiZmZ5eHqemVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWuIKDWlKFpGcl3V/OgszMrKNi7qgvBhrLVYiZmeVXUFBLqga+CNxc3nLMzKyzQu+ofwhcBmzuqoGkekkNkhqam5tLUZuZmVFAUEuaCLwZEQu31i4iZkREXUTUVVVVlaxAM7PtXSF31EcDp0paBswCviDpl2WtyszM2nQb1BFxRURUR0QNMAV4NCLOLXtlZmYGeB61mVnyBhXTOCIeBx4vSyVmZpaX76jNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLXbVBLqpS0QNJzkl6QdHVvFGZmZjmDCmjzPvCFiFgnaTDwpKTfRsRTZa7NzMwoIKgjIoB12cPB2Z8oZ1FmZvaRgsaoJVVIWgS8CTwSEfPztKmX1CCpobm5ucRlmpltvwoK6ojYFBFjgWrgSEkH5WkzIyLqIqKuqqqqxGWamW2/ipr1ERFvAY8BJ5alGjMz20Ihsz6qJA3Lft4JOA5YWua6zMwsU8isj5HA7ZIqyAX7XRFxf3nLMjOzVoXM+ngeOKwXajEzszz8yUQzs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxx3X65raQ9gTuAPYAAZkTEj8pdmJlt3cG3H1zS/hZ/eXFJ+7PS6TaogQ+Bf4iIZyTtAiyU9EhEvFjm2szMyqq/PNl1O/QRESsj4pns53eARmBUWaoxM7MtFDVGLakGOAyYn2dfvaQGSQ3Nzc0lKs/MzAoOakk7A/cA346ItZ33R8SMiKiLiLqqqqpS1mhmtl0rKKglDSYX0r+KiDnlLcnMzNrrNqglCbgFaIyI68pfkpmZtVfIrI+jgfOAxZIWZdv+MSJ+U7aqElbqd4lh+5wW1V/ebTdLQbdBHRFPAuqFWszMLA9/MtHMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0tct0Et6VZJb0pa0hsFmZlZR4XcUd8GnFjmOszMrAvdBnVE/AFY3Qu1mJlZHiUbo5ZUL6lBUkNzc3OpujUz2+6VLKgjYkZE1EVEXVVVVam6NTPb7nnWh5lZ4hzUZmaJK2R63kzgT8B+kpoknV/+sszMrNWg7hpExNTeKMTMzPLz0IeZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIKCmpJJ0p6SdKrki4vd1FmZvaRboNaUgVwE3AScAAwVdIB5S7MzMxyCrmjPhJ4NSL+HBEbgVnAaeUty8zMWikitt5AmgScGBFfyx6fB3w6Ii7q1K4eqM8e7ge8VPpyy2oEsKqvi+jnfA1Lw9ex5/rjNdwrIqry7RhUqn8hImYAM0rVX2+T1BARdX1dR3/ma1gavo49N9CuYSFDH8uBPds9rs62mZlZLygkqJ8GPimpVtIQYApwX3nLMjOzVt0OfUTEh5IuAh4CKoBbI+KFslfW+/rtsE1CfA1Lw9ex5wbUNez2zUQzM+tb/mSimVniHNRmZokb0EEt6W8kzZL0H5IWSvqNpH37uq7+RNKVkl6Q9LykRZI+XeTxNZKWlKu+/kjS6ZJC0v5d7H9c0hZTyyRNk3Rj+StMj6Th2e/fIklvSFre7vGQrM2pxS5xIWmZpBHlqbp0SjaPOjWSBMwFbo+IKdm2Q4E9gJf7srb+QtJRwETg8Ih4P/uFHtLHZQ0EU4Ens7//uY9r6RciogUYCyBpOrAuIq5t3S9pUETcxwCdkTaQ76j/FvggIn7auiEingOelPSvkpZIWizpLABJn5f0B0kPZAtQ/VTSDpK+KumHrX1I+rqk63v9bPrGSGBVRLwPEBGrImKFpCMk/VHSc5IWSNolu3N+QtIz2Z/PdO5MUkV27Z/O7tC/0etn1Mck7QyMB84nN9UVSTtlr/waJc0FdmrX/iuSXpa0ADi6T4pOlKTbsv+n84F/af+KQ9IpkuZLelbS7yTtkW0fLunh7FXizYD68hwKNZCD+iBgYZ7tZ5J7Zj4UmAD8q6SR2b4jgW+RW3xq76ztXcApkgZnbb4C3Fq+spPyMLBnFhQ/kfS57GXmbODiiGi9hu8BbwLHRcThwFnAj/P0dz7wdkQcARwBfF1Sba+cSTpOAx6MiJeBFkmfAi4A3o2IMeTusD8FkP1eXk0uoMeT+720jqqBz0TEpZ22PwmMi4jDyK1PdFm2/Z+BJyPiQHKvuEf3WqU9MGCHPrZiPDAzIjYBf5X07+RCYy2wICL+DCBpJjA+In4t6VFgoqRGYHBELO6r4ntTRKzLguSz5F6hzAa+B6yMiKezNmsBJA0FbpQ0FtgE5Hsv4HjgkGz9GIBdgU8Cr5XzPBIzFfhR9vOs7PE+ZE9sEfG8pOez/Z8GHo+IZgBJs8l/Xbdnd2f/lzurBmZnT3ZD+Oh37BhyN2BExAOS1vROmT0zkIP6BWBSt6066jypvPXxzcA/AkuBn/ewrn4l+0/wOPC4pMXAhV00vQT4K7lXKjsAG/K0EfCtiHioDKUmT9LuwBeAgyUFuQ+QBfBsnxbWv63vYvsNwHURcZ+kzwPTe6ugchjIQx+PAjtmq/oBIOkQ4C3grGy8tIrcM+yCrMmR2UfldyD38v1JgIiYT269k7OBmb13Cn1L0n6SPtlu01igERgp6YiszS6SBpG7O14ZEZuB88iFUGcPARe0DiNJ2je7E99eTAJ+ERF7RURNROxJ7k5vIbnfLSQdBByStZ8PfC4bVx0M/Le+KLqf2pWP1iT6crvtf+Cja30SsFsv17VNBuwddUSEpDOAH0r6Drk7vGXAt4GdgefI3c1cFhFvZFOlngZuJPdS9DFyY1it7gLGRkS/eKlUIjsDN0gaBnwIvEpuKdufZ9t3Ijc+PQH4CXCPpC8BD5L/TudmoAZ4JpuV0wycXt5TSMpU4P902nYPcBiwUza01kj23kpErMxmOPyJ3A3Got4qdACYDtydDW08CrS+F3I1MFPSC8Afgb/0TXnF8UfIM9nLo/8eERO72H8/cH1E/L436zIzG8hDHyUhaZikl4H3HNJm1hd8R21mljjfUZuZJc5BbWaWOAe1mVniHNRmZolzUJuZJe7/A8g7kct7rA82AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# O3 Results\n", + "\n", + "print(\"Results \", plot_idx)\n", + "plot_idx += 1\n", + "\n", + "plt.title(\"1 O3CPU\")\n", + "f = open(home_path + \"/m5out_node_O3_0/system.platform.terminal\", \"r\")\n", + "exp = 0\n", + "kernel = 0\n", + "for lines in f.read().split(\"\\n\"):\n", + " if \"Bandwidth\" in lines:\n", + " data[exp][kernel] = float(lines.split(\" \")[1])\n", + " kernel += 1\n", + " if kernel % 4 == 0:\n", + " exp += 1\n", + " kernel = 0\n", + "\n", + "print([i for i in range(4)], data)\n", + "\n", + "x_axis = np.arange(len(kernels))\n", + "plt.xticks(x_axis, kernels)\n", + "\n", + "plt.bar(x_axis, data[0], bar_width, label=\"NUMA-0\")\n", + "plt.bar(x_axis - bar_width, data[1], bar_width, label=\"NUMA-0,1\")\n", + "plt.bar(x_axis + bar_width, data[2], bar_width, label=\"NUMA-1\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results 13\n", + "[0, 1, 2, 3] [[10.324512 10.065496 11.409131 11.371691]\n", + " [10.31722 10.129058 11.51054 11.468607]\n", + " [ 7.576464 7.489263 8.33215 8.295167]]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEICAYAAABGaK+TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXt0lEQVR4nO3de5RU5Z3u8e9jAzZeRlR6HKQ13eMVvAST1uBljCfgJQY1RoaAxkhuJDF6Ep1ZXg4zR3KSrGROjGYUc7KIGjWJDaIiJs4ojsZRE0UbQZCLl4mMNhdtWhAREYXf+aM22DQNXV21+/LC81mrF1273v3Wr/Zqnnrrrb3fUkRgZmbp2aW7CzAzs9I4wM3MEuUANzNLlAPczCxRDnAzs0Q5wM3MEuUANzNLlAPckiHpEEnrJP22nXaDJd0v6W1J70j6o6QTWtzfX9KfJDVLWiXpKUknturjUElTJa3I+pkr6XJJFZJqJIWkNdnPYklXZfttuq9Xq/5uk/TDPI+HmQPcUnIT8Oz2Gkg6CPgTMA+oBfYHpgEzJB2fNVsDfBWoAvYG/gX4/abQzfqYCbwOHBURewF/D9QBe7Z4uH4RsQcwBvjfks7I40maFcsBbkmQNBpYBTzSTtMJwFMRMT4i3oqIdyLiBuA3FIKaiFgXES9GxEZAwAYKQb5P1sf3gT9HxOURsSzb58WIOD8iVrV+wIh4CpgPHFnm0zTrEAe49XiS/gr4P8DlRTQ/FZjaxva7gBMl9W3R71xgHXA/cHNEvJndNRy4u8jalE2/HAHMLmYfs7z0ar+JWbf7AXBLRDRKaq9tf2BZG9uXURiw7AMsAYiIoyVVAucCfVq03XcbfbS2AghgOXBVRDwiqaaI/cxy4QC3Hk3SEAoj4mOK3GUFMKCN7QOAjcDKlhsjYh1QL2mhpDkR8TzQvI0+WusfER+22rbpdu8Wv2+6/UERfZoVzVMo1tOdAtQAr0laDvwjcJ6k57bR/j8ofODY2igKc+Nrt7Ffb+BvW/RxXon1LqMQ1DWtttcC/11in2ZtcoBbTzcJOAgYkv38EngAOH0b7b8PnCDpR5L2kbSnpEuBLwNXAkgaKukkSX0k9ZV0JbAfhTNPAK7J+vippL/J9jlY0m8l9dtesRGxAbgH+JGkfSX1ljQGGAz8e4nHwKxNDnDr0SJibUQs3/RD4RTAdRHRtI32LwMnAR8HFlMYEZ8HnB4Rf8qa7UrhlMRmCvPhZwKfi4ilWR//BRxPYRQ9X9LbFEK5AXiniLIvBt4C5gJvApdk/b/RsWdvtn3yFzqYmaXJI3Azs0Q5wM3MEuUANzNLVLsBLulWSW9KeqHFtp9KWpQt8DOtvU/mzcwsf+1+iCnpZAqf/N8REUdm204DHo2IDyVtWl/iyvYerH///lFTU1N20WZmO5NZs2atiIiq1tvbvRIzIh5vfXlwRMxocfNpYGQxRdTU1NDQ0FBMUzMzy0hq8yKwPObAv8p2LlCQNE5Sg6SGpqY2T901M7MSlBXgksZTWO/hd9tqExGTIqIuIuqqqrZ6B2BmZiUqeTErSWOBEcCw8NVAZmZdrqQAz7555Arg09tZHMjMdmAffPABjY2NrFu3rrtL2WFUVlZSXV1N7969i2rfboBLqqewIlx/SY0UFvq5msJ6Eg9n6zM/HRHfKrVoM0tPY2Mje+65JzU1NRSxTru1IyJobm6msbGR2traovYp5iyUMW1svqWjxZnZjmXdunUO7xxJYt9996UjJ3v4SkwzK5nDO18dPZ4OcDOzRPkr1cwsFzVXPZBrf4t/8rl220ji8ssv52c/+xkA1157LWvWrGHChAmMHTuWESNGMHLkR9cZ7rHHHqxZs4bFixdTW1vL+PHj+eEPfwjAihUrGDBgAN/85jeZOHHi5n2GDBnC4YcfzuTJk7dZx49//GNuueUWKioquOGGGzj99G1930i+HOBmHTFhr07o8+38+9wRLJ3dbpNdd+3DvVMnc/XVV9O/f/8OdV9bW8sDDzywOcCnTp3KEUccsUWbhQsXsmHDBp544gneffdddt999636WbBgAZMnT2b+/PksXbqU4cOH89JLL1FRUdGhekrhKRQzS1avigrGXfAFrr/++g7vu9tuuzFo0KDNy3tMmTKFUaNGbdGmvr6eCy+8kNNOO43p06e32c/06dMZPXo0u+66K7W1tRx88ME888wzHX8yJfAI3KybdcfUw47kO2NHMWj4BZx5wTiWrXqPtWvXMbdxFSvfXc9/N7/L3MZVm9tuDJjbuIoly1az7oMNDB0+ght/dTsXbKzkvQ+DDZX9WLHm1c3tp0yZwsMPP8yiRYu48cYbOf/887d6/CVLljB06NDNt6urq1myZEmnPudNPAI3s6T91Z57MOK80dx566Qt72jjjI7WZ3mceMpwnn7ijzx4/72cfta5W9zX0NBA//79OfDAAxk2bBizZ8/mrbfeyr3+cjjAzSx5X/rat7lvym94772PLgzvt/c+rH571ebbb69cSb999tliv959+jDoqCHcMekmTj3znC3uq6+vZ9GiRdTU1HDQQQexevVq7rnnHqZNm8aQIUMYMmQIDQ0NDBw4kNdff33zfo2NjQwcOLBznmgrDnAzS95ee+/NaSM+z7TJv9m8re74E3no99P4YP16AKZPvZNjj/+7rfb98rjv8L2rJ7DX3ntv3rZx40buuusu5s2bx+LFi1m8eDHTp0+nvr6ec889lzlz5jBnzhzq6uo4++yzmTx5Mu+//z6vvvoqL7/8Mscdd1znP2k8B25mOenQ3HsRZ5h01JfHXcLk227efPvTw89g4bznGX3mKVRUVFD9sVr+6cfXbbXfwYcN4uDDBm2x7YknnmDgwIHsv//+m7edfPLJLFiwgGXLljFgwIDN24844ghGjRrF4MGD6dWrFzfddFOXnIECRXwjT57q6urCX+hgSeuE0whr1t2Za39d9SHmwoULGTRoUPsN25JzgM/dWNzaIcU6urpfrv11RFvHVdKsiKhr3dZTKGZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klyueBm1k+8j7Fctxj7TbRwE9w+bgvcdE/3wjA7b+8kbVr3+Xbl1/FP192MScPP51TP/fRFZZDD6vm6RcbWfL6a5x5wsf5xqX/wCVX/BMAK99qZvgnD+e8C8Yy+bZfbd4nr+Vkx48fzx133MHKlStZs2ZNsUdhuzwCN7Nk7bprH+7990dZ+VZzh/cdeODHePzRGZtvP/yH+zjo0MO3aNN6Odm2tFxO9sEHH+Tiiy9mw4YNW7U766yzcl+l0AFuZsnatJzsb3/1iw7vW1nZl789+FDmP1+4qOih30/jtBGf36JNnsvJDh06dIsrOPPgADezpH1n7Cj+7b6pvLO641+MccbZX+DB++9l+dJGdqmooGq/LQN2ypQpjB49mjFjxlBfX99mH0uWLOGAAw7YfNvLyZqZFcnLyZqZJczLyZqZJaqnLic7bNiwTp1OSec0wpxPUcp7BTjY+b7KymwLHfly5p1gOdmNGzfyyiuvsE826r/iiiu48847Wbt2LdXV1Xz9619nwoQJZT3ndJaTdYBbT+DlZDfzcrLb98ILL3Drrbdy3XVbv2hsj5eTNTPrZkceeWSHw7uj2g1wSbdKelPSCy227SPpYUkvZ//uvb0+zMwsf8XMgd8GTATuaLHtKuCRiPiJpKuy21fmX57lqhPe/ndo3tPMctVugEfE45JqWm0+Bzgl+/124DEc4DulmqseyLU/f45gVrxS58D3i4hl2e/Lgf221VDSOEkNkhqamppKfDgzM2ut7A8xo3AayzZPZYmISRFRFxF1VVVV5T6cmZllSj0P/A1JAyJimaQBwJt5FmVm6Tnq9qNy7W/eqXe026YnLCfb3NzMyJEjefbZZxk7diwTJ04s52l3SKkj8PuBi7LfLwLaXqbLzKwT9YTlZCsrK/nBD37Atdde2+EaylXMaYT1wFPAYZIaJX0N+AlwqqSXgeHZbTOzLtUTlpPdfffdOemkk6isrOxwDeVqN8AjYkxEDIiI3hFRHRG3RERzRAyLiEMiYnhE9Kwlusxsp9Hdy8l2J1+JaWZJ83KyZmYJ687lZLuTA9zMktedy8l2p3SWkzWzHm3eRfOKb7wDLScLUFNTw+rVq1m/fj333XcfM2bMYPDgwTk/w605wM0sWWte/tPm3/et+mtmvrx0i/u/ddmVfOuyrVf5GHjAgdz7yFNbbT9n1PmcM+p8jq7ux9NPP73FfRUVFSxfvrzNOhYvXlxC9eXzFIqZWaIc4GZmiXKAm1nJuvIbvXYGHT2eDnAzK0llZSXNzc0O8ZxEBM3NzR26otMfYppZSaqrq2lsbKSkZaJX5bv+3Rs5v4gsfKdvrv0Vq7Kykurq6qLbO8DNrCS9e/emtrbELxOeMDTXWj6b6BdDl8tTKGZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmifJqhGY7mKNuPyrX/jr0ZcXWpRzgZmat5P0iCJ3zQugpFDOzRJUV4JIukzRf0guS6iUV/11AZmZWlpIDXNJA4H8CdRFxJFABjM6rMDMz275yp1B6AX0l9QJ2A5aWX5KZmRWj5ACPiCXAtcBrwDLg7YiY0bqdpHGSGiQ1lPTlp2Zm1qZyplD2Bs4BaoH9gd0lfal1u4iYFBF1EVFXVVVVeqVmZraFcqZQhgOvRkRTRHwA3AuckE9ZZmbWnnIC/DVgqKTdJAkYBizMpywzM2tPOXPgM4G7geeAeVlfk3Kqy8zM2lHWlZgRcQ1wTU61mJlZB/hKTDOzRHktlBx5ESEz60oegZuZJcojcOtR/C7GrHgegZuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiygpwSf0k3S1pkaSFko7PqzAzM9u+XmXu/6/AgxExUlIfYLccajIzsyKUHOCS9gJOBsYCRMR6YH0+ZZmZWXvKmUKpBZqAX0uaLelmSbu3biRpnKQGSQ1NTU1lPJyZmbVUToD3Aj4B/L+IOAZ4F7iqdaOImBQRdRFRV1VVVcbDmZlZS+UEeCPQGBEzs9t3Uwh0MzPrAiUHeEQsB16XdFi2aRiwIJeqzMysXeWehXIp8LvsDJS/AF8pvyQzMytGWQEeEXOAunxKMTOzjvCVmGZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpaosgNcUoWk2ZL+kEdBZmZWnDxG4N8FFubQj5mZdUBZAS6pGvgccHM+5ZiZWbHKHYH/HLgC2LitBpLGSWqQ1NDU1FTmw5mZ2SYlB7ikEcCbETFre+0iYlJE1EVEXVVVVakPZ2ZmrZQzAj8ROFvSYmAy8BlJv82lKjMza1fJAR4RV0dEdUTUAKOBRyPiS7lVZmZm2+XzwM3MEtUrj04i4jHgsTz6MjOz4ngEbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZokqOcAlHSDpj5IWSJov6bt5FmZmZtvXq4x9PwT+ISKek7QnMEvSwxGxIKfazMxsO0oegUfEsoh4Lvv9HWAhMDCvwszMbPtymQOXVAMcA8xs475xkhokNTQ1NeXxcGZmRg4BLmkP4B7gexGxuvX9ETEpIuoioq6qqqrchzMzs0xZAS6pN4Xw/l1E3JtPSWZmVoxyzkIRcAuwMCKuy68kMzMrRjkj8BOBC4HPSJqT/ZyZU11mZtaOkk8jjIgnAeVYi5mZdYCvxDQzS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NElRXgks6Q9KKkVyRdlVdRZmbWvpIDXFIFcBPwWWAwMEbS4LwKMzOz7StnBH4c8EpE/CUi1gOTgXPyKcvMzNqjiChtR2kkcEZEfD27fSHwqYi4pFW7ccC47OZhwIull9tt+gMruruIxPkYls/HsHypHsOPRURV6429OvtRI2ISMKmzH6czSWqIiLruriNlPobl8zEs3452DMuZQlkCHNDidnW2zczMukA5Af4scIikWkl9gNHA/fmUZWZm7Sl5CiUiPpR0CfAQUAHcGhHzc6usZ0l6CqiH8DEsn49h+XaoY1jyh5hmZta9fCWmmVmiHOBmZonaaQNc0t9ImizpvyTNkvRvkg7t7rpSIWm8pPmS5kqaI+lTHdy/RtILnVVfiiR9XlJIOnwb9z8maatT4CSNlTSx8yvseSTtm/39zZG0XNKSFrf7ZG3O7uhSH5IWS+rfOVXnp9PPA++JJAmYBtweEaOzbR8H9gNe6s7aUiDpeGAE8ImIeD/7Q+/TzWXtCMYAT2b/XtPNtSQhIpqBIQCSJgBrIuLaTfdL6hUR97ODniG3s47A/wfwQUT8ctOGiHgeeFLSTyW9IGmepC8CSDpF0uOSHsgW7/qlpF0kfVXSzzf1Iekbkq7v8mfT9QYAKyLifYCIWBERSyUdK+nPkp6X9IykPbOR9hOSnst+TmjdmaSK7Lg/m43ov9nlz6ibSdoDOAn4GoVTcpHUN3uXuFDSNKBvi/ZfkfSSpGeAE7ul6B5K0m3Z/9GZwP9t+Q5F0lmSZkqaLek/JO2Xbd9X0ozsXeXNgLrzORRrZw3wI4FZbWz/AoVX848Dw4GfShqQ3XcccCmFhbsOytreBZwlqXfW5ivArZ1Xdo8xAzggC5BfSPp09nZ1CvDdiNh0/N4D3gROjYhPAF8Ebmijv68Bb0fEscCxwDck1XbJM+k5zgEejIiXgGZJnwS+DayNiEEURuSfBMj+Jr9PIbhPovA3aVuqBk6IiMtbbX8SGBoRx1BYv+mKbPs1wJMRcQSFd+cHdlmlZdgpp1C24ySgPiI2AG9I+k8KgbIaeCYi/gIgqR44KSLulvQoMELSQqB3RMzrruK7SkSsyQLm7yi8m5kC/AhYFhHPZm1WA0jaHZgoaQiwAWjrc4bTgKOz9XUA9gIOAV7tzOfRw4wB/jX7fXJ2+2CyF7yImCtpbnb/p4DHIqIJQNIU2j6uO7Op2f/j1qqBKdmLYB8++hs7mcKgjIh4QNLKrimzPDtrgM8HRrbbakutT5jfdPtm4H8Bi4Bfl1lXMrL/HI8Bj0maB3xnG00vA96g8K5mF2BdG20EXBoRD3VCqT2epH2AzwBHSQoKF8YFMLtbC0vbu9vYfiNwXUTcL+kUYEJXFdQZdtYplEeBXbOVEgGQdDSwCvhiNidbReFV+ZmsyXHZsgG7UJgKeBIgImZSWBPmfKC+655C95F0mKRDWmwaAiwEBkg6Nmuzp6ReFEbTyyJiI3AhhXBq7SHg25umoiQdmo3cdxYjgd9ExMcioiYiDqAwMpxF4e8KSUcCR2ftZwKfzuZtewN/3x1FJ2ovPlqz6aIW2x/no2P9WWDvLq6rJDvlCDwiQtK5wM8lXUlhVLgY+B6wB/A8hRHQFRGxPDut61lgIoW3tX+kME+2yV3AkIhI4m1XDvYAbpTUD/gQeIXCksG/zrb3pTD/PRz4BXCPpC8DD9L2yOhmoAZ4LjtDqAn4fOc+hR5lDPAvrbbdAxwD9M2m5xaSfW4TEcuyMy6eojDomNNVhe4AJgBTsymSR4FNn7V8H6iXNB/4M/Ba95TXMb6UvgjZW61/jIgR27j/D8D1EfFIV9ZlZju3nXUKJReS+kl6CXjP4W1mXc0jcDOzRHkEbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWqP8Pz0bgU31bkeQAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# O3 Results\n", + "\n", + "print(\"Results \", plot_idx)\n", + "plot_idx += 1\n", + "\n", + "plt.title(\"4 O3CPU\")\n", + "f = open(home_path + \"/m5out_node_0/system.platform.terminal\", \"r\")\n", + "exp = 0\n", + "kernel = 0\n", + "for lines in f.read().split(\"\\n\"):\n", + " if \"Bandwidth\" in lines:\n", + " data[exp][kernel] = float(lines.split(\" \")[1])\n", + " kernel += 1\n", + " if kernel % 4 == 0:\n", + " exp += 1\n", + " kernel = 0\n", + "\n", + "print([i for i in range(4)], data)\n", + "\n", + "x_axis = np.arange(len(kernels))\n", + "plt.xticks(x_axis, kernels)\n", + "\n", + "plt.bar(x_axis, data[0], bar_width, label=\"NUMA-0\")\n", + "plt.bar(x_axis - bar_width, data[1], bar_width, label=\"NUMA-0,1\")\n", + "plt.bar(x_axis + bar_width, data[2], bar_width, label=\"NUMA-1\")\n", + "plt.legend()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/disaggregated_memory_setup/test_board_sst.py b/disaggregated_memory_setup/test_board_sst.py new file mode 100644 index 0000000000..459adfface --- /dev/null +++ b/disaggregated_memory_setup/test_board_sst.py @@ -0,0 +1,234 @@ +# Copyright (c) 2021-2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import Port, AddrRange, OutgoingRequestBridge +from gem5.resources.resource import AbstractResource +from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload +from gem5.components.boards.abstract_system_board import AbstractSystemBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.components.processors.abstract_generator import AbstractGenerator +from gem5.components.boards.test_board import TestBoard + +from typing import List, Optional, Sequence, Tuple +from gem5.utils.override import overrides + + +class TestBoardForSST(TestBoard): + """This board implements a test board for SST/External Memory devices. It + is assumed that the device has two memories. + """ + + def __init__( + self, + clk_freq: str, + generator: AbstractGenerator, + remote_memory_size: str, + memory: Optional[AbstractMemorySystem], + cache_hierarchy: Optional[AbstractCacheHierarchy], + ): + self._localMemory = None + if memory is not None: + self._localMemory = memory + self._remoteMemory = OutgoingRequestBridge() + self._remoteMemorySize = remote_memory_size + super().__init__( + clk_freq=clk_freq, + generator=generator, + memory=self._localMemory, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = self._localMemory + self.remote_memory = self._remoteMemory + + @overrides(AbstractSystemBoard) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + # raise Exception("cannot call this method") + return self._remoteMemory + + @overrides(AbstractSystemBoard) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + + @overrides(AbstractSystemBoard) + def _setup_memory_ranges(self): + # The local memory can be empty in this case. + local_memory = None + remote_memory = self.get_remote_mem() + # This is a string + remote_mem_size = self._remoteMemorySize + # using a _global_ memory range to keep a track of all the memory + # ranges. This is used to generate the dtb for this machine + start_addr_for_remote = 0x0 + self._global_mem_ranges = [] + if self.get_local_memory() is not None: + local_memory = self.get_local_memory() + self._global_mem_ranges.append( + AddrRange(start=0x0, size=local_memory.get_size()) + ) + start_addr_for_remote = local_memory.get_size() + local_memory.set_memory_range(self._global_mem_ranges[0]) + + self._global_mem_ranges.append( + AddrRange(start=start_addr_for_remote, size=remote_mem_size()) + ) + + remote_memory.physical_address_ranges = self._global_mem_ranges[-1] + + # the memory has to be setup for both the memory ranges. there is one + # local memory range, close to the host machine and the other range is + # pure memory, far from the host. + + # self._local_mem_ranges = [ + # AddrRange(start=0x80000000, size=local_mem_size) + # ] + + # The remote memory starts where the local memory ends. Therefore it + # has to be offset by the local memory's size. + # self._remote_mem_ranges = [ + # AddrRange(start=0x80000000 + local_mem_size, size=remote_mem_size) + # ] + + # keeping a hole in the mem ranges to simulate multiple nodes without + # using a translator simobject. + # remote_memory_start_addr = 0x80000000 + local_mem_size + self._instanceCount * 0x80000000 + # self._remote_mem_ranges = [ + # AddrRange(start=remote_memory_start_addr, size=remote_mem_size) + # ] + + # self._global_mem_ranges.append(self._local_mem_ranges[0]) + # self._global_mem_ranges.append(self._remote_mem_ranges[0]) + + # setting the memory ranges for both of the memory ranges. + # local_memory.set_memory_range(self._local_mem_ranges) + # remote_memory.physical_address_ranges = self._remote_mem_ranges + # remote_memory.set_memory_range(self._remote_mem_ranges) + + @overrides(TestBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + if self.get_local_memory() is not None: + self.get_local_memory().incorporate_memory(self) + + # # Add a NoncoherentXBar here + + # self.remote_link = NoncoherentXBar( + # frontend_latency = 0, + # forward_latency = 0, + # response_latency = 0, + # width = 64 + # ) + # self.get_remote_memory().port = self.remote_link.mem_side_ports + # self.get_cache_hierarchy().membus.mem_side_ports = self.remote_link.cpu_side_ports + + self.get_remote_memory().port = ( + self.get_cache_hierarchy().membus.mem_side_ports + ) + # self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + if not self.get_cache_hierarchy(): + # If we have no caches, then there must be a one-to-one + # connection between the generators and the memories. + assert len(self.get_processor().get_cores()) == 1 + # assert len(self.get_memory().get_mem_ports()) == 1 + self.get_processor().get_cores()[0].connect_dcache( + self.get_remote_memory().get_remote_mem_ports()[0][1] + ) + + @overrides(TestBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + print( + "__ranges__", self.get_remote_memory().physical_address_ranges[0] + ) + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + if self.get_local_memory() is not None: + self.get_local_memory()._post_instantiate() + # self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory_setup/traffic_gen_sst.py b/disaggregated_memory_setup/traffic_gen_sst.py new file mode 100644 index 0000000000..269ff8c13e --- /dev/null +++ b/disaggregated_memory_setup/traffic_gen_sst.py @@ -0,0 +1,136 @@ +# Copyright (c) 2021-2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script is used for running a traffic generator connected to a memory +device. It supports linear and random accesses with a configurable amount +of write traffic. + +By default, this scripts runs with one channel (two pseudo channels) of HBM2 +and this channel is driven with 32GiB/s of traffic for 1ms. +""" + +import argparse + +from m5.objects import MemorySize, AddrRange + +# from gem5.components.boards.test_board import TestBoard + +from test_board_sst import TestBoardForSST + +from gem5.components.processors.linear_generator import LinearGenerator +from gem5.components.processors.random_generator import RandomGenerator + +from gem5.components.memory.hbm import HighBandwidthMemory +from gem5.components.memory.dram_interfaces.hbm import HBM_2000_4H_1x64 + +from gem5.simulate.simulator import Simulator + +# For hooking up SST with this system. +from m5.objects import OutgoingRequestBridge + + +def generator_factory( + generator_class: str, rd_perc: int, mem_size: MemorySize +): + rd_perc = int(rd_perc) + if rd_perc > 100 or rd_perc < 0: + raise ValueError( + "Read percentage has to be an integer number between 0 and 100." + ) + if generator_class == "LinearGenerator": + return LinearGenerator( + duration="1ms", rate="32GiB/s", max_addr=mem_size, rd_perc=rd_perc + ) + elif generator_class == "RandomGenerator": + return RandomGenerator( + duration="1ms", rate="32GiB/s", max_addr=mem_size, rd_perc=rd_perc + ) + else: + raise ValueError(f"Unknown generator class {generator_class}") + + +parser = argparse.ArgumentParser( + description="A traffic generator that can be used to test a gem5 " + "memory component." +) + +parser.add_argument( + "--generator-class", + type=str, + help="The class of generator to use.", + choices=[ + "LinearGenerator", + "RandomGenerator", + ], + default="LinearGenerator", +) + +parser.add_argument( + "--memory-size", type=str, help="Memory size as a string", default="1GiB" +) + +parser.add_argument( + "--read-percentage", + type=int, + help="Percentage of read requests in the generated traffic.", + default=100, +) + + +args = parser.parse_args() + +# Single pair of HBM2 pseudo channels. This can be replaced with any +# single ported memory device +# memory = HighBandwidthMemory(HBM_2000_4H_1x64, 1, 128) +memory_size = args.memory_size +# sst_memory = OutgoingRequestBridge(physical_address_ranges = AddrRange(start = 0x0, size = memory_size)) + +# print("mem-size: ", str(sst_memory.physical_address_ranges[0])[2:]) + +generator = generator_factory( + args.generator_class, + args.read_percentage, + int(str(AddrRange(0x0, memory_size))[2:]), +) + +# We use the Test Board. This is a special board to run traffic generation +# tasks. Can replace the cache_hierarchy with any hierarchy to simulate the +# cache as well as the memory +board = TestBoardForSST( + clk_freq="1GHz", # Ignored for these generators + generator=generator, # We pass the traffic generator as the processor. + # memory=sst_memory, + remote_memory_size=memory_size, + memory=None, + # With no cache hierarchy the test board will directly connect the + # generator to the memory + cache_hierarchy=None, +) +board._pre_instantiate() +root = Root(full_system=True, system=board) +# simulator = Simulator(board=board) +# simulator.run() diff --git a/disaggregated_memory_setup/x86_dm_board.py b/disaggregated_memory_setup/x86_dm_board.py new file mode 100644 index 0000000000..db73096ba9 --- /dev/null +++ b/disaggregated_memory_setup/x86_dm_board.py @@ -0,0 +1,383 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Creating an x86 board that can simulate more than 3 GB memory. + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + Port, + Pc, + AddrRange, + X86FsLinux, + Addr, + X86SMBiosBiosInformation, + X86IntelMPProcessor, + X86IntelMPIOAPIC, + X86IntelMPBus, + X86IntelMPBusHierarchy, + X86IntelMPIOIntAssignment, + X86E820Entry, + Bridge, + IOXBar, + IdeDisk, + CowDiskImage, + RawDiskImage, + BaseXBar, + Port, + OutgoingRequestBridge, +) + +import os +import m5 +from abc import ABCMeta +from gem5.components.boards.x86_board import X86Board +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class X86DMBoard(X86Board): + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + cache_hierarchy: AbstractCacheHierarchy, + memory: AbstractMemorySystem, + # remote_memory_str: str + # remote_memory: AbstractMemorySystem + remote_memory_size: str, + ) -> None: + self._localMemory = memory + self._remoteMemorySize = remote_memory_size + self._remoteMemory = OutgoingRequestBridge( + physical_address_ranges=AddrRange(0x40000000, 0x80000000) + ) + print(self._remoteMemory.physical_address_ranges[0]) + super().__init__( + clk_freq=clk_freq, + processor=processor, + cache_hierarchy=cache_hierarchy, + memory=memory, + ) + self.local_memory = memory + self.remote_memory = self._remoteMemory + + @overrides(X86Board) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + # raise Exception("cannot call this method") + return self._remoteMemory + + @overrides(X86Board) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + # return self.get_remote_memory().get_mem_ports() + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + + @overrides(X86Board) + def _setup_memory_ranges(self): + # Need to create 2 entries for the memory ranges + # local_memory = self.get_local_memory() + # remote_memory = self.get_local_memory() + + # local_mem_size = local_memory.get_size() + # remote_mem_size = remote_memory.get_size() + + self._local_mem_ranges = [ + "2GiB" + # AddrRange(local_mem_size) + ] + + # The remote memory starts where the local memory ends. Therefore it + # has to be offset by the local memory's size. + # self._remote_mem_ranges = [ + # AddrRange(start=0x100000000, size=remote_mem_size) + # # AddrRange(remote_mem_size) + # ] + # Keep it under 2 GB for this case. Each slice of memory is 1 GB. + + self.mem_ranges = [ + self._local_mem_ranges[0], + # self._remote_mem_ranges[0], + AddrRange(0xC0000000, size=0x100000), # For I/0 + ] + + @overrides(X86Board) + def get_default_kernel_args(self) -> List[str]: + return [ + "earlyprintk=ttyS0", + "console=ttyS0", + "lpj=7999923", + "root={root_value}", + "init=/bin/bash", + ] + + # @overrides(X86Board) + def _setup_io_devicess(self): + """Sets up the x86 IO devices. + + Note: This is mostly copy-paste from prior X86 FS setups. Some of it + may not be documented and there may be bugs. + """ + + # Constants similar to x86_traits.hh + IO_address_space_base = 0x8000000000000000 + pci_config_address_space_base = 0xC000000000000000 + interrupts_address_space_base = 0xA000000000000000 + APIC_range_size = 1 << 12 + + # Setup memory system specific settings. + if self.get_cache_hierarchy().is_ruby(): + self.pc.attachIO(self.get_io_bus(), [self.pc.south_bridge.ide.dma]) + else: + self.bridge = Bridge(delay="50ns") + self.bridge.mem_side_port = self.get_io_bus().cpu_side_ports + self.bridge.cpu_side_port = ( + self.get_cache_hierarchy().get_mem_side_port() + ) + + # # Constants similar to x86_traits.hh + IO_address_space_base = 0x8000000000000000 + pci_config_address_space_base = 0xC000000000000000 + interrupts_address_space_base = 0xA000000000000000 + APIC_range_size = 1 << 12 + + self.bridge.ranges = [ + AddrRange(0xC0000000, 0xFFFF0000), + AddrRange( + IO_address_space_base, interrupts_address_space_base - 1 + ), + AddrRange(pci_config_address_space_base, Addr.max), + ] + + self.apicbridge = Bridge(delay="50ns") + self.apicbridge.cpu_side_port = self.get_io_bus().mem_side_ports + self.apicbridge.mem_side_port = ( + self.get_cache_hierarchy().get_cpu_side_port() + ) + self.apicbridge.ranges = [ + AddrRange( + interrupts_address_space_base, + interrupts_address_space_base + + self.get_processor().get_num_cores() * APIC_range_size + - 1, + ) + ] + self.pc.attachIO(self.get_io_bus()) + + # Add in a Bios information structure. + self.workload.smbios_table.structures = [X86SMBiosBiosInformation()] + + # Set up the Intel MP table + base_entries = [] + ext_entries = [] + for i in range(self.get_processor().get_num_cores()): + bp = X86IntelMPProcessor( + local_apic_id=i, + local_apic_version=0x14, + enable=True, + bootstrap=(i == 0), + ) + base_entries.append(bp) + + io_apic = X86IntelMPIOAPIC( + id=self.get_processor().get_num_cores(), + version=0x11, + enable=True, + address=0xFEC00000, + ) + + self.pc.south_bridge.io_apic.apic_id = io_apic.id + base_entries.append(io_apic) + pci_bus = X86IntelMPBus(bus_id=0, bus_type="PCI ") + base_entries.append(pci_bus) + isa_bus = X86IntelMPBus(bus_id=1, bus_type="ISA ") + base_entries.append(isa_bus) + connect_busses = X86IntelMPBusHierarchy( + bus_id=1, subtractive_decode=True, parent_bus=0 + ) + ext_entries.append(connect_busses) + + pci_dev4_inta = X86IntelMPIOIntAssignment( + interrupt_type="INT", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=0, + source_bus_irq=0 + (4 << 2), + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=16, + ) + + base_entries.append(pci_dev4_inta) + + def assignISAInt(irq, apicPin): + + assign_8259_to_apic = X86IntelMPIOIntAssignment( + interrupt_type="ExtInt", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=1, + source_bus_irq=irq, + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=0, + ) + base_entries.append(assign_8259_to_apic) + + assign_to_apic = X86IntelMPIOIntAssignment( + interrupt_type="INT", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=1, + source_bus_irq=irq, + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=apicPin, + ) + base_entries.append(assign_to_apic) + + assignISAInt(0, 2) + assignISAInt(1, 1) + + for i in range(3, 15): + assignISAInt(i, i) + + self.workload.intel_mp_table.base_entries = base_entries + self.workload.intel_mp_table.ext_entries = ext_entries + + entries = [ + # Mark the first megabyte of memory as reserved + X86E820Entry(addr=0, size="639kB", range_type=1), + X86E820Entry(addr=0x9FC00, size="385kB", range_type=2), + # Mark the rest of physical memory as available + # the local address comes first. + X86E820Entry( + addr=0x100000, + size=f"{self.mem_ranges[0].size() - 0x100000:d}B", + range_type=1, + ), + # X86E820Entry( + # addr=0x100000000, + # size=f"{self.mem_ranges[1].size()}B", + # range_type=1, + # ), + ] + # print("____", self.mem_ranges[0].size() + 0x100000) + + # Reserve the last 16kB of the 32-bit address space for m5ops + entries.append( + X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2) + ) + + print(entries) + self.workload.e820_table.entries = entries + + @overrides(AbstractBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + print("_", self.get_local_memory().mem_ctrl) + self.get_remote_memory().port = ( + self.get_cache_hierarchy().membus.mem_side_ports + ) + # self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(AbstractBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + # self.get_remote_memory()._post_instantiate() diff --git a/ext/sst/gem5.cc b/ext/sst/gem5.cc index 6dc305f2ed..feec4900ec 100644 --- a/ext/sst/gem5.cc +++ b/ext/sst/gem5.cc @@ -205,11 +205,11 @@ gem5Component::init(unsigned phase) if (phase == 0) { initPython(args.size(), &args[0]); - const std::vector m5_instantiate_commands = { - "import m5", - "m5.instantiate()" - }; - execPythonCommands(m5_instantiate_commands); + // const std::vector m5_instantiate_commands = { + // "import m5", + // "m5.instantiate()" + // }; + // execPythonCommands(m5_instantiate_commands); // calling SimObject.startup() const std::vector simobject_setup_commands = { diff --git a/ext/sst/gem5.hh b/ext/sst/gem5.hh index 172c2c8e76..0a4d3c0f88 100644 --- a/ext/sst/gem5.hh +++ b/ext/sst/gem5.hh @@ -143,11 +143,11 @@ class gem5Component: public SST::Component {"cmd", "command to run gem5's config"} ) - SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( - // These are the generally expected ports. - {"system_port", "Connection to gem5 system_port", "gem5.gem5Bridge"}, - {"cache_port", "Connection to gem5 CPU", "gem5.gem5Bridge"} - ) + // SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( + // // These are the generally expected ports. + // {"system_port", "Connection to gem5 system_port", "gem5.gem5Bridge"}, + // {"cache_port", "Connection to gem5 CPU", "gem5.gem5Bridge"} + // ) }; diff --git a/ext/sst/sst/example_board.py b/ext/sst/sst/example_board.py new file mode 100644 index 0000000000..8439f39ae2 --- /dev/null +++ b/ext/sst/sst/example_board.py @@ -0,0 +1,144 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ps" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to +# memory currently, we do not subtract 0x80000000 from the request's address to +# get the "real" address so, the mem_size would always be 2GiB larger than the +# desired memory size + +# memory_size_gem5 = "2GiB" +memory_size_sst = "6GiB" +addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32MiB", + "L1" : "1", +} + +# We keep a track of all the memory ports that we have. +sst_ports = { + # "system_port" : "system.system_outgoing_bridge", + # "cache_port" : "system.memory_outgoing_bridge" + "remote_memory_port" : "system.remote_memory" +} + +# We need a list of ports. +port_list = [] +for port in sst_ports: + port_list.append(port) + +cpu_params = { + "frequency": cpu_clock_rate, + "cmd": " ../../disaggregated_memory_setup/numa_config_sst.py", + "debug_flags": "VIO", + "ports" : " ".join(port_list) +} + +gem5_node = sst.Component("gem5_node", "gem5.gem5Component") +gem5_node.addParams(cpu_params) + +# cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") +# cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +# for initialization +# system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) +# tell the SubComponent the name of the corresponding SimObject +# system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) + +# SST -> gem5 +# cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0) +# cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) + +# L1 cache +# l1_cache = sst.Component("l1_cache", "memHierarchy.Cache") +# l1_cache.addParams(l1_params) + +remote_memory_port = gem5_node.setSubComponent("remote_memory_port", + "gem5.gem5Bridge", 0) +remote_memory_port.addParams({ + "response_receiver_name" : sst_ports["remote_memory_port"] +}) + +# Memory +memctrl = sst.Component("memory", "memHierarchy.MemController") +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "30ns", + "mem_size" : memory_size_sst +}) + +# Connections +# cpu <-> L1 +# cpu_cache_link = sst.Link("cpu_l1_cache_link") +# cpu_cache_link.connect( +# (cache_port, "port", cache_link_latency), +# (cache_bus, "high_network_0", cache_link_latency) +# ) +# system_cache_link = sst.Link("system_cache_link") +# system_cache_link.connect( +# (system_port, "port", cache_link_latency), +# (cache_bus, "high_network_1", cache_link_latency) +# ) +# cache_bus_cache_link = sst.Link("cache_bus_cache_link") +# cache_bus_cache_link.connect( +# (cache_bus, "low_network_0", cache_link_latency), +# (l1_cache, "high_network_0", cache_link_latency) +# ) +# L1 <-> mem +cache_mem_link = sst.Link("l1_cache_mem_link") +cache_mem_link.connect( + (remote_memory_port, "port", cache_link_latency), + # (l1_cache, "low_network_0", cache_link_latency), + (memctrl, "direct_link", cache_link_latency) +) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(5) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-stats.txt"}) diff --git a/ext/sst/sst/example_board_x86.py b/ext/sst/sst/example_board_x86.py new file mode 100644 index 0000000000..5d7d3464a4 --- /dev/null +++ b/ext/sst/sst/example_board_x86.py @@ -0,0 +1,144 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ps" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to +# memory currently, we do not subtract 0x80000000 from the request's address to +# get the "real" address so, the mem_size would always be 2GiB larger than the +# desired memory size + +# memory_size_gem5 = "2GiB" +memory_size_sst = "6GiB" +addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32MiB", + "L1" : "1", +} + +# We keep a track of all the memory ports that we have. +sst_ports = { + # "system_port" : "system.system_outgoing_bridge", + # "cache_port" : "system.memory_outgoing_bridge" + "remote_memory_port" : "system.remote_memory" +} + +# We need a list of ports. +port_list = [] +for port in sst_ports: + port_list.append(port) + +cpu_params = { + "frequency": cpu_clock_rate, + "cmd": " ../../disaggregated_memory_setup/numa_config_x86.py", + "debug_flags": "", + "ports" : " ".join(port_list) +} + +gem5_node = sst.Component("gem5_node", "gem5.gem5Component") +gem5_node.addParams(cpu_params) + +# cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") +# cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +# for initialization +# system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) +# tell the SubComponent the name of the corresponding SimObject +# system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) + +# SST -> gem5 +# cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0) +# cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) + +# L1 cache +# l1_cache = sst.Component("l1_cache", "memHierarchy.Cache") +# l1_cache.addParams(l1_params) + +remote_memory_port = gem5_node.setSubComponent("remote_memory_port", + "gem5.gem5Bridge", 0) +remote_memory_port.addParams({ + "response_receiver_name" : sst_ports["remote_memory_port"] +}) + +# Memory +memctrl = sst.Component("memory", "memHierarchy.MemController") +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "30ns", + "mem_size" : memory_size_sst +}) + +# Connections +# cpu <-> L1 +# cpu_cache_link = sst.Link("cpu_l1_cache_link") +# cpu_cache_link.connect( +# (cache_port, "port", cache_link_latency), +# (cache_bus, "high_network_0", cache_link_latency) +# ) +# system_cache_link = sst.Link("system_cache_link") +# system_cache_link.connect( +# (system_port, "port", cache_link_latency), +# (cache_bus, "high_network_1", cache_link_latency) +# ) +# cache_bus_cache_link = sst.Link("cache_bus_cache_link") +# cache_bus_cache_link.connect( +# (cache_bus, "low_network_0", cache_link_latency), +# (l1_cache, "high_network_0", cache_link_latency) +# ) +# L1 <-> mem +cache_mem_link = sst.Link("l1_cache_mem_link") +cache_mem_link.connect( + (remote_memory_port, "port", cache_link_latency), + # (l1_cache, "low_network_0", cache_link_latency), + (memctrl, "direct_link", cache_link_latency) +) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(5) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-stats.txt"}) diff --git a/ext/sst/sst/example_freq.py b/ext/sst/sst/example_freq.py new file mode 100644 index 0000000000..7886e196b4 --- /dev/null +++ b/ext/sst/sst/example_freq.py @@ -0,0 +1,139 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "0ps" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "1GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to +# memory currently, we do not subtract 0x80000000 from the request's address to +# get the "real" address so, the mem_size would always be 2GiB larger than the +# desired memory size +memory_size_gem5 = "2GiB" +memory_size_sst = "4GiB" +addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "2 MiB", + "L1" : "1", +} + +# We keep a track of all the memory ports that we have. +sst_ports = { + # "system_port" : "system.system_outgoing_bridge", + "cache_port" : "system.memory_outgoing_bridge" +} + +# We need a list of ports. +port_list = [] +for port in sst_ports: + port_list.append(port) + +cpu_params = { + "frequency": cpu_clock_rate, + "cmd": " --outdir=10MHz ../../configs/example/sst/riscv_fs.py" + + f" --cpu-clock-rate {cpu_clock_rate}" + + f" --memory-size {memory_size_gem5}", + "debug_flags": "VIO", + "ports" : " ".join(port_list) +} + +gem5_node = sst.Component("gem5_node", "gem5.gem5Component") +gem5_node.addParams(cpu_params) + +cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") +cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +# for initialization +# system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) +# tell the SubComponent the name of the corresponding SimObject +# system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) + +# SST -> gem5 +cache_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) +cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) + +# L1 cache +# l1_cache = sst.Component("l1_cache", "memHierarchy.Cache") +# l1_cache.addParams(l1_params) + +# Memory +memctrl = sst.Component("memory", "memHierarchy.MemController") +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "1GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "48.75ns", + "mem_size" : memory_size_sst +}) + +# Connections +# cpu <-> L1 +cpu_cache_link = sst.Link("cpu_l1_cache_link") +cpu_cache_link.connect( + (cache_port, "port", cache_link_latency), + (cache_bus, "high_network_0", cache_link_latency) +) +# system_cache_link = sst.Link("system_cache_link") +# system_cache_link.connect( +# (system_port, "port", cache_link_latency), +# (cache_bus, "high_network_1", cache_link_latency) +# ) +# cache_bus_cache_link = sst.Link("cache_bus_cache_link") +# cache_bus_cache_link.connect( +# (cache_bus, "low_network_0", cache_link_latency), +# (l1_cache, "high_network_0", cache_link_latency) +# ) +# L1 <-> mem +cache_mem_link = sst.Link("l1_cache_mem_link") +cache_mem_link.connect( + (cache_bus, "low_network_0", cache_link_latency), + (memctrl, "direct_link", cache_link_latency) +) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(5) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-stats-10Mhz.txt"}) +# sst.enableAllStatisticsForComponentName("l1_cache", stat_params) +sst.enableAllStatisticsForComponentName("memory", stat_params) diff --git a/ext/sst/sst/example_nodes.py b/ext/sst/sst/example_nodes.py new file mode 100644 index 0000000000..2f14c4db93 --- /dev/null +++ b/ext/sst/sst/example_nodes.py @@ -0,0 +1,221 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# DEfine the number of gem5 nodes in the system. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_size = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_size[0])) + 2) + "GiB" +print(sst_memory_size) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +cache_buses = [] +directory_caches = [] +comp_dirctrls = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [ + "--outdir=m5out_{}".format(node), + "../../configs/example/sst/riscv_fs_node.py", + "--cpu-clock-rate {}".format(cpu_clock_rate), + "--memory-size {}".format(node_memory_size), + # "--local-memory-size {}".format(node_memory_size), + # "--remote-memory-size {}".format(remote_memory_slice), + "--instance {}".format(node) + ] + ports = { + "remote_memory_port" : "system.memory_outgoing_bridge" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "Plic,Clint", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + # We need a separate cache bus for each of the nodes + cache_buses.append( + sst.Component("cache_bus_for_node_{}".format(node), "memHierarchy.Bus") + ) + cache_buses[node].addParams({"bus_frequency" : cpu_clock_rate}) + # TODO: This needs to be updated + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : "system.memory_outgoing_bridge" + }) + directory_caches.append(create_cache("dir_cache_{}".format(node))) + directory_caches[node].addParams({"network_address" : "2" }) + # Connect the basic components. + connect_components("node_{}_mem_port_2_bus".format(node), + memory_ports[node], 0, + cache_buses[node], 0, + port = True) + connect_components("node_{}_bus_2_dir_cache".format(node), + cache_buses[node], 0, + directory_caches[node], 0) + # Create directory controllers that dictates the memory ranges for each of + # the remote meory nodes. + comp_dirctrls.append(sst.Component( + "dirctrl_for_node_{}".format(node), + "memHierarchy.DirectoryController") + ) + addr_range_start = 0x80000000 + node * 0x80000000 + addr_range_end = 0x80000000 + (node + 1) * 0x80000000 + comp_dirctrls[node].addParams({ + "coherence_protocol" : "MESI", + "network_address" : "1", + "entry_cache_size" : "16384", + "network_bw" : "25GB/s", + "addr_range_start" : addr_range_start, # 2 * (1024 ** 3), # starts at 0x80000000 + "addr_range_end" : addr_range_end # 2 * (1024 ** 3) + 2048 * (1024 ** 2) # ends at 0x100000000 (4GiB) + }) +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes +memory = sst.Component("memory", "memHierarchy.MemController") +memory.addParams({ + "request_width" : 64, + "coherence_protocol" : "MESI", + "access_time" : "33 ns", + "backend.mem_size" : sst_memory_size, + "clock" : "2.4GHz", + "debug" : "0", + "range_start" : 2 * (1024 ** 3), # it's behind a directory controller and it starts at 0x80000000 + }) +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") +comp_chiprtr.addParams({ + "xbar_bw" : "128GB/s", + "link_bw" : "128GB/s", + "input_buf_size" : "1KB", + "num_ports" : str(system_nodes * 2), + "flit_size" : "72B", + "output_buf_size" : "1KB", + "id" : "0", + "topology" : "merlin.singlerouter" +}) +mem_bus = sst.Component("membus", "memHierarchy.Bus") +# Finally connect all the nodes together in the net +for node in range(system_nodes): + sst.Link("link_cache_net_node_{}".format(node)).connect( + (directory_caches[node], "directory", "10ns"), + (comp_chiprtr, "port" + str(node * 2 + 1), "2ns")) + sst.Link("link_dir_net_nodes_{}".format(node)).connect( + (comp_chiprtr, "port" + str(node * 2), "2ns"), + (comp_dirctrls[node], "network", "2ns")) + sst.Link("link_dir_mem_link_node_{}".format(node)).connect( + (comp_dirctrls[node], "memory", "10ns"), + (memory, "direct_link", "10ns")) +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-router-example.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes_multi_ISA.py b/ext/sst/sst/example_nodes_multi_ISA.py new file mode 100644 index 0000000000..46e04a9751 --- /dev/null +++ b/ext/sst/sst/example_nodes_multi_ISA.py @@ -0,0 +1,234 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() +print(sst_memory_size) + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "30ns", + "mem_size" : sst_memory_size +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [] + ports = {} + if node % 2 == 1: + # This is a RISCV node + cmd = [ + f"--outdir=m5out_riscv_node_{node}", + "../../disaggregated_memory_setup/numa_config_sst_nodes.py", + f"--cpu-clock-rate {cpu_clock_rate}", + f"--instance {node}" + + # "--outdir=m5out_{}".format(node), + # "../../configs/example/sst/riscv_fs_node.py", + # "--cpu-clock-rate {}".format(cpu_clock_rate), + # "--memory-size {}".format(node_memory_slice), + # # "--local-memory-size {}".format(node_memory_slice), + # # "--remote-memory-size {}".format(remote_memory_slice), + # "--instance {}".format(node) + ] + ports = { + "remote_memory_port" : "system.remote_memory" + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + else: + cmd = [ + f"--outdir=m5out_arm_node_{node}", + "../../disaggregated_memory_setup/numa_config_sst_nodes_arm.py", + f"--cpu-clock-rate {cpu_clock_rate}", + f"--cpu-type timing", + f"--local-memory-range 2GiB", + f"--remote-memory-range 4294967296,6442450944", + # f"--instance {node}" + ] + ports = { + "remote_memory_port" : "system.remote_memory_outgoing_bridge" + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + # This is an ARM core + + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "Plic,Clint,VIO", + "ports" : " ".join(port_list) + } + + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + + # directory_caches.append(create_cache("dir_cache_{}".format(node))) + # directory_caches[node].addParams({"network_address" : "2" }) + # Connect the basic components. + # connect_components("node_{}_mem_port_2_bus".format(node), + # memory_ports[node], 0, + # cache_buses[node], node, + # port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-example.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes_w_board.py b/ext/sst/sst/example_nodes_w_board.py new file mode 100644 index 0000000000..40eaae9656 --- /dev/null +++ b/ext/sst/sst/example_nodes_w_board.py @@ -0,0 +1,206 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 4 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() +print(sst_memory_size) + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "50ns", + "mem_size" : sst_memory_size +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [ + f"--outdir=m5out_node_ARM_O3_{node}", + # f"--outdir=switch_{node}", + # "../../disaggregated_memory_setup/numa_config_sst_nodes.py", + "../../disaggregated_memory_setup/numa_config_sst_nodes_arm.py", + f"--cpu-clock-rate {cpu_clock_rate}", + f"--instance {node}" + + # "--outdir=m5out_{}".format(node), + # "../../configs/example/sst/riscv_fs_node.py", + # "--cpu-clock-rate {}".format(cpu_clock_rate), + # "--memory-size {}".format(node_memory_slice), + # # "--local-memory-size {}".format(node_memory_slice), + # # "--remote-memory-size {}".format(remote_memory_slice), + # "--instance {}".format(node) + ] + ports = { + "remote_memory_port" : "system.remote_memory" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + + # directory_caches.append(create_cache("dir_cache_{}".format(node))) + # directory_caches[node].addParams({"network_address" : "2" }) + # Connect the basic components. + # connect_components("node_{}_mem_port_2_bus".format(node), + # memory_ports[node], 0, + # cache_buses[node], node, + # port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-example-O3.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes_w_board_arm.py b/ext/sst/sst/example_nodes_w_board_arm.py new file mode 100644 index 0000000000..0096a190d1 --- /dev/null +++ b/ext/sst/sst/example_nodes_w_board_arm.py @@ -0,0 +1,201 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() +print(sst_memory_size) + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "50ns", + "mem_size" : sst_memory_size +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + node_range = [0x80000000 + (node + 1) * 0x80000000, 0x80000000 + (node + 2) * 0x80000000] + print(node_range) + cmd = [ + f"--outdir=m5out_arm_node_{node}", + "../../disaggregated_memory_setup/numa_config_sst_nodes_arm.py", + f"--cpu-clock-rate {cpu_clock_rate}", + f"--cpu-type o3", + f"--local-memory-range 2GiB", + f"--remote-memory-range {node_range[0]},{node_range[1]}", + # f"--instance {node}" + ] + ports = { + "remote_memory_port" : "system.remote_memory" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + + # directory_caches.append(create_cache("dir_cache_{}".format(node))) + # directory_caches[node].addParams({"network_address" : "2" }) + # Connect the basic components. + # connect_components("node_{}_mem_port_2_bus".format(node), + # memory_ports[node], 0, + # cache_buses[node], node, + # port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-arm-example-O3.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes_w_board_full_mem.py b/ext/sst/sst/example_nodes_w_board_full_mem.py new file mode 100644 index 0000000000..00e492f8c2 --- /dev/null +++ b/ext/sst/sst/example_nodes_w_board_full_mem.py @@ -0,0 +1,204 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() +print(sst_memory_size) + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "50ns", + "mem_size" : sst_memory_size +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [ + f"--outdir=m5out_node_O3x_{node}", + "../../disaggregated_memory_setup/numa_config_sst_nodes.py", + f"--cpu-clock-rate {cpu_clock_rate}", + f"--instance {node}" + + # "--outdir=m5out_{}".format(node), + # "../../configs/example/sst/riscv_fs_node.py", + # "--cpu-clock-rate {}".format(cpu_clock_rate), + # "--memory-size {}".format(node_memory_slice), + # # "--local-memory-size {}".format(node_memory_slice), + # # "--remote-memory-size {}".format(remote_memory_slice), + # "--instance {}".format(node) + ] + ports = { + "remote_memory_port" : "system.remote_memory" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + + # directory_caches.append(create_cache("dir_cache_{}".format(node))) + # directory_caches[node].addParams({"network_address" : "2" }) + # Connect the basic components. + # connect_components("node_{}_mem_port_2_bus".format(node), + # memory_ports[node], 0, + # cache_buses[node], node, + # port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-example.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_traffic_gen.py b/ext/sst/sst/example_traffic_gen.py new file mode 100644 index 0000000000..0ad3e10a3a --- /dev/null +++ b/ext/sst/sst/example_traffic_gen.py @@ -0,0 +1,218 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "1GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 1 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() +print(sst_memory_size) + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "1", + "clock" : "1GHz", + "request_width" : "64", + "verbose" : 2, + "debug_level" : 10, + "backing" : "none", + "addr_range_end" : addr_range_end, +}) +# memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +# memory.addParams({ +# "access_time" : "50ns", +# "mem_size" : sst_memory_size +# }) + +memory = memctrl.setSubComponent( "backend", "memHierarchy.timingDRAM") +memory.addParams({ + "id" : 0, + "addrMapper" : "memHierarchy.sandyBridgeAddrMapper", + "addrMapper.interleave_size" : "64B", + "addrMapper.row_size" : "1KiB", + "clock" : "2.4GHz", + "mem_size" : sst_memory_size, + "channels" : 1, + "channel.numRanks" : 2, + "channel.rank.numBanks" : 16, + "channel.transaction_Q_size" : 64, + "channel.rank.bank.CL" : 14, + "channel.rank.bank.CL_WR" : 12, + "channel.rank.bank.RCD" : 14, + "channel.rank.bank.TRP" : 14, + "channel.rank.bank.dataCycles" : 2, + "channel.rank.bank.pagePolicy" : "memHierarchy.timeoutPagePolicy", + "channel.rank.bank.transactionQ" : "memHierarchy.reorderTransactionQ", + "channel.rank.bank.pagePolicy.timeoutCycles" : 50, + "printconfig" : 0, + "channel.printconfig" : 0, + "channel.rank.printconfig" : 0, + "channel.rank.bank.printconfig" : 0, +}) + + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [ + f"--outdir=traffic_gen_{node}", + "../../configs/example/sst/traffic_gen.py", + f"--cpu-clock-rate {cpu_clock_rate}", + "--memory-size 1GiB" + ] + ports = { + "remote_memory_port" : "system.memory_outgoing_bridge" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", # TrafficGen", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-traffic-example.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst_responder.cc b/ext/sst/sst_responder.cc index 14c64abb52..b97910d566 100644 --- a/ext/sst/sst_responder.cc +++ b/ext/sst/sst_responder.cc @@ -65,4 +65,6 @@ void SSTResponder::handleRecvFunctional(gem5::PacketPtr pkt) { owner->handleRecvFunctional(pkt); + // std::cout << "SSTResponder" << std::endl; + // make these timing? } diff --git a/ext/sst/sst_responder_subcomponent.cc b/ext/sst/sst_responder_subcomponent.cc index e7ac78673f..35db9dc101 100644 --- a/ext/sst/sst_responder_subcomponent.cc +++ b/ext/sst/sst_responder_subcomponent.cc @@ -108,6 +108,9 @@ SSTResponderSubComponent::init(unsigned phase) addr, data.size(), data); memoryInterface->sendUntimedData(request); } + responseReceiver->initData.clear(); + responseReceiver->initPhaseComplete(true); + std::cout << "Init phase is complete " << responseReceiver->getInitPhaseStatus() << std::endl; } memoryInterface->init(phase); } @@ -205,6 +208,25 @@ SSTResponderSubComponent::portEventHandler( dynamic_cast(request)) { return; } + else if (SST::Interfaces::StandardMem::ReadResp* test = + dynamic_cast( + request)) { + // functional calls this + // gem5::RequestPtr req = std::make_shared( + // dynamic_cast( + // request)->pAddr, + // dynamic_cast( + // request)->size, 0, 0); + + // gem5::PacketPtr pkt = new gem5::Packet( + // req, gem5::MemCmd::ReadResp); + + // // Clear out bus delay notifications + // pkt->headerDelay = pkt->payloadDelay = 0; + // if (!(responseReceiver->sendTimingResp(pkt))) + // responseQueue.push(pkt); + return; + } else if (SST::Interfaces::StandardMem::WriteResp* test = dynamic_cast( request)) { @@ -241,6 +263,44 @@ SSTResponderSubComponent::handleRecvRespRetry() void SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) { + // SST does not understand what is a functional access in gem5. Since it + // has all the stored in it's memory, any functional access made to SST has + // to be correctly handled. All functional access *must be* writes. + std::cout << "handleRecvFunc was called! Need to do something here!" << std::endl; + // basically this data has to be present + gem5::Addr addr = pkt->getAddr(); + uint8_t* ptr = pkt->getPtr(); + uint64_t size = pkt->getSize(); + + // Create a new request to handle this request immediately. + SST::Interfaces::StandardMem::Request* request = nullptr; + + // we need a minimal translator here which does reads and writes. + switch((gem5::MemCmd::Command)pkt->cmd.toInt()) { + case gem5::MemCmd::WriteReq: { + std::vector data(ptr, ptr+size); + request = new SST::Interfaces::StandardMem::Write( + addr, data.size(), data); + break; + } + case gem5::MemCmd::ReadReq: { + request = new SST::Interfaces::StandardMem::Read(addr, size); + break; + } + default: + panic("handleRecvFunctional: Unable to convert gem5 packet: %s\n", pkt->cmd.toString()); + } + if(pkt->req->isUncacheable()) { + request->setFlag( + SST::Interfaces::StandardMem::Request::Flag::F_NONCACHEABLE); + } + memoryInterface->send(request); + // memoryInterface->sendUntimedData(request); + + // memoryInterface->init(phase); + // sst does not understand what is a functional request. queue this as a + // regular memory packet. + } bool diff --git a/ext/sst/translator.hh b/ext/sst/translator.hh index 236abddcd7..040c146f37 100644 --- a/ext/sst/translator.hh +++ b/ext/sst/translator.hh @@ -157,7 +157,9 @@ gem5RequestToSSTRequest(gem5::PacketPtr pkt, if (pkt->needsResponse()) sst_request_id_to_packet_map[request->getID()] = pkt; - + + // if(gem5::curTick() > 340330000000) + // std::cout << request->getString() << std::endl; return request; } @@ -165,6 +167,8 @@ inline void inplaceSSTRequestToGem5PacketPtr(gem5::PacketPtr pkt, SST::Interfaces::StandardMem::Request* request) { + // if(gem5::curTick() > 340330000000) + // std::cout << request->getString() << std::endl; pkt->makeResponse(); // Resolve the success of Store Conditionals diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 58abfbad46..12e1f01bcc 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021 The Regents of the University of California +// Copyright (c) 2021-2023 The Regents of the University of California // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ OutgoingRequestBridge::OutgoingRequestBridge( physicalAddressRanges(params.physical_address_ranges.begin(), params.physical_address_ranges.end()) { + this->init_phase_bool = false; } OutgoingRequestBridge::~OutgoingRequestBridge() @@ -84,6 +85,7 @@ OutgoingRequestBridge::getAddrRanges() const std::vector>> OutgoingRequestBridge::getInitData() const { + std::cout << "getInitData() called!" << std::endl; return initData; } @@ -105,19 +107,48 @@ OutgoingRequestBridge::sendTimingSnoopReq(gem5::PacketPtr pkt) outgoingPort.sendTimingSnoopReq(pkt); } +void +OutgoingRequestBridge::initPhaseComplete(bool value) { + init_phase_bool = value; +} +bool +OutgoingRequestBridge::getInitPhaseStatus() { + return init_phase_bool; +} void OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) { - uint8_t* ptr = pkt->getPtr(); - uint64_t size = pkt->getSize(); - std::vector data(ptr, ptr+size); - initData.push_back(std::make_pair(pkt->getAddr(), data)); + // This should not receive any functional accesses + gem5::MemCmd::Command pktCmd = (gem5::MemCmd::Command)pkt->cmd.toInt(); + // std::cout << "Recv Functional : 0x" << std::hex << pkt->getAddr() << + // std::dec << " " << pktCmd << " " << gem5::MemCmd::WriteReq << " " << + // getInitPhaseStatus() << std::endl; + // Check at which stage are we at. If we are at INIT phase, then queue all + // these packets. + if (!getInitPhaseStatus()) + { + // sstResponder->recvAtomic(pkt); + uint8_t* ptr = pkt->getPtr(); + uint64_t size = pkt->getSize(); + std::vector data(ptr, ptr+size); + initData.push_back(std::make_pair(pkt->getAddr(), data)); + } + // This is the RUN phase. + else { + // These packets have to translated at runtime. We convert these + // packets to timing as its data has to be stored correctly in SST + // memory. + // + // Ensure that these packets are write requests. + sstResponder->handleRecvFunctional(pkt); + } } Tick OutgoingRequestBridge:: OutgoingRequestPort::recvAtomic(PacketPtr pkt) { + // return 0; assert(false && "OutgoingRequestPort::recvAtomic not implemented"); return Tick(); } diff --git a/src/sst/outgoing_request_bridge.hh b/src/sst/outgoing_request_bridge.hh index af049af45a..0a3e2e2821 100644 --- a/src/sst/outgoing_request_bridge.hh +++ b/src/sst/outgoing_request_bridge.hh @@ -69,6 +69,18 @@ class OutgoingRequestBridge: public SimObject AddrRangeList getAddrRanges() const; }; + // We need a boolean variable to distinguish between INIT and RUN phases in + // SST. Gem5 does functional accesses to the SST memory when: + // (a) It loads the kernel (at the start of the simulation + // (b) During VIO/disk accesses. + // While loading the kernel, it is easy to handle all functional accesses + // as SST allows initializing of untimed data during its INIT phase. + // However, functional accesses done to the SST memory during RUN phase has + // to handled separately. In this implementation, we convert all such + // functional accesses to timing accesses so that it is correctly read from + // the memory. + bool init_phase_bool; + public: // a gem5 ResponsePort OutgoingRequestPort outgoingPort; @@ -97,8 +109,18 @@ class OutgoingRequestBridge: public SimObject // Returns the buffered data for initialization. This is necessary as // when gem5 sends functional requests to memory for initialization, // the connection in SST Memory Hierarchy has not been constructed yet. + // This buffer is only used during the INIT phase. std::vector>> getInitData() const; + // We need Set/Get functions to set the init_phase_bool. + // `initPhaseComplete` is used to signal the outgoing bridge that INIT + // phase is completed and RUN phase will start. + void initPhaseComplete(bool value); + + // We read the value of the init_phase_bool using `getInitPhaseStatus` + // method. + + bool getInitPhaseStatus(); // gem5 Component (from SST) will call this function to let set the // bridge's corresponding SSTResponderSubComponent (which implemented // SSTResponderInterface). I.e., this will connect this bridge to the From ddfbb2e46603e2e998f67fb5bbafcfaae610d1e3 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Mon, 30 Oct 2023 19:49:41 -0700 Subject: [PATCH 03/23] ext: added functional accesses to gem5/SST bridge. This change adds functional accesses to the gem5/SST bridge to boot disk images using the bridge. The change also adds more information in INSTALL.md for MacOS users on how to get the bridge setup working. Signed-off-by: Kaustav Goswami --- ext/sst/INSTALL.md | 7 +++++++ ext/sst/Makefile | 21 --------------------- ext/sst/gem5.hh | 1 - ext/sst/sst_responder.cc | 2 -- ext/sst/sst_responder_subcomponent.cc | 16 +++++++++++----- src/sst/outgoing_request_bridge.cc | 14 ++++++++------ 6 files changed, 26 insertions(+), 35 deletions(-) delete mode 100644 ext/sst/Makefile diff --git a/ext/sst/INSTALL.md b/ext/sst/INSTALL.md index d7b9b6fd87..376a4132f8 100644 --- a/ext/sst/INSTALL.md +++ b/ext/sst/INSTALL.md @@ -84,6 +84,13 @@ According to the OS that you're using, you need to rename the `Makefile.xxx` to cp Makefile.xxx Makefile # linux or mac make -j4 ``` +If you are compiling this on Mac, then you'd need to export `DYLD_LIBRARY_PATH` +```sh +# go to the base gem5 directory +cd ../.. +export DYLD_LIBRARY_PATH=:`pwd`/build/RISCV/ +``` + Change `ARCH=RISCV` to `ARCH=ARM` in the `Makefile` in case you're compiling for ARM. ### Running an example simulation diff --git a/ext/sst/Makefile b/ext/sst/Makefile deleted file mode 100644 index 9213d266e9..0000000000 --- a/ext/sst/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -SST_VERSION=SST-11.1.0 # Name of the .pc file in lib/pkgconfig where SST is installed -GEM5_LIB=gem5_opt -ARCH=RISCV -OFLAG=3 - -LDFLAGS=-shared -fno-common ${shell pkg-config ${SST_VERSION} --libs} -L../../build/${ARCH}/ -Wl,-rpath ../../build/${ARCH} -CXXFLAGS=-std=c++17 -g -O${OFLAG} -fPIC ${shell pkg-config ${SST_VERSION} --cflags} ${shell python3-config --includes} -I../../build/${ARCH}/ -I../../ext/pybind11/include/ -I../../build/softfloat/ -I../../ext -CPPFLAGS+=-MMD -MP -SRC=$(wildcard *.cc) - -.PHONY: clean all - -all: libgem5.so - -libgem5.so: $(SRC:%.cc=%.o) - ${CXX} ${CPPFLAGS} ${LDFLAGS} $? -o $@ -l${GEM5_LIB} - --include $(SRC:%.cc=%.d) - -clean: - ${RM} *.[do] libgem5.so diff --git a/ext/sst/gem5.hh b/ext/sst/gem5.hh index 0a4d3c0f88..135b963f99 100644 --- a/ext/sst/gem5.hh +++ b/ext/sst/gem5.hh @@ -79,7 +79,6 @@ #include #include -#include #include #include diff --git a/ext/sst/sst_responder.cc b/ext/sst/sst_responder.cc index b97910d566..14c64abb52 100644 --- a/ext/sst/sst_responder.cc +++ b/ext/sst/sst_responder.cc @@ -65,6 +65,4 @@ void SSTResponder::handleRecvFunctional(gem5::PacketPtr pkt) { owner->handleRecvFunctional(pkt); - // std::cout << "SSTResponder" << std::endl; - // make these timing? } diff --git a/ext/sst/sst_responder_subcomponent.cc b/ext/sst/sst_responder_subcomponent.cc index 35db9dc101..ead4c69dd6 100644 --- a/ext/sst/sst_responder_subcomponent.cc +++ b/ext/sst/sst_responder_subcomponent.cc @@ -263,11 +263,13 @@ SSTResponderSubComponent::handleRecvRespRetry() void SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) { - // SST does not understand what is a functional access in gem5. Since it + // SST does not understand what is a functional access in gem5 since SST + // only allows functional accesses at init time. Since it // has all the stored in it's memory, any functional access made to SST has - // to be correctly handled. All functional access *must be* writes. + // to be correctly handled. The idea here is to convert this timing access + // into a timing access and keep the SST memory consistent. std::cout << "handleRecvFunc was called! Need to do something here!" << std::endl; - // basically this data has to be present + gem5::Addr addr = pkt->getAddr(); uint8_t* ptr = pkt->getPtr(); uint64_t size = pkt->getSize(); @@ -275,7 +277,8 @@ SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) // Create a new request to handle this request immediately. SST::Interfaces::StandardMem::Request* request = nullptr; - // we need a minimal translator here which does reads and writes. + // we need a minimal translator here which does reads and writes. Any other + // command type is unexpected and the program should crash immediately. switch((gem5::MemCmd::Command)pkt->cmd.toInt()) { case gem5::MemCmd::WriteReq: { std::vector data(ptr, ptr+size); @@ -288,7 +291,10 @@ SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) break; } default: - panic("handleRecvFunctional: Unable to convert gem5 packet: %s\n", pkt->cmd.toString()); + panic( + "handleRecvFunctional: Unable to convert gem5 packet: %s\n", + pkt->cmd.toString() + ); } if(pkt->req->isUncacheable()) { request->setFlag( diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 12e1f01bcc..74426d778c 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -85,7 +85,6 @@ OutgoingRequestBridge::getAddrRanges() const std::vector>> OutgoingRequestBridge::getInitData() const { - std::cout << "getInitData() called!" << std::endl; return initData; } @@ -119,7 +118,7 @@ void OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) { // This should not receive any functional accesses - gem5::MemCmd::Command pktCmd = (gem5::MemCmd::Command)pkt->cmd.toInt(); + // gem5::MemCmd::Command pktCmd = (gem5::MemCmd::Command)pkt->cmd.toInt(); // std::cout << "Recv Functional : 0x" << std::hex << pkt->getAddr() << // std::dec << " " << pktCmd << " " << gem5::MemCmd::WriteReq << " " << // getInitPhaseStatus() << std::endl; @@ -133,13 +132,16 @@ OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) std::vector data(ptr, ptr+size); initData.push_back(std::make_pair(pkt->getAddr(), data)); } - // This is the RUN phase. + // This is the RUN phase. SST does not allow any sendUntimedData (AKA + // functional accesses) to it's memory. We need to convert these accesses + // to timing to at least store the correct data in the memory. else { // These packets have to translated at runtime. We convert these // packets to timing as its data has to be stored correctly in SST - // memory. - // - // Ensure that these packets are write requests. + // memory. Otherwise reads from the SST memory will fail. To reproduce + // this error, don not handle any functional accesses and the kernel + // boot will fail while reading the correct partition from the vda + // device. sstResponder->handleRecvFunctional(pkt); } } From 309a73fde7fd92b5a4422e015ea701e67d63cf60 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Wed, 4 Oct 2023 17:11:05 -0700 Subject: [PATCH 04/23] ext: Updated the gem5 SST Bridge to use SST 13.0.0 This change updates the gem5 SST Bridge to use SST 13.0.0. Changes are made to replace SimpleMem class to StandardMem class as SimpleMem will be deprecated in SST 14 and above. In addition, the translator.hh is updated to translate more types of gem5 packets. A new parameter `ports` was added on SST's side when invoking the gem5 component which does not require recompiling the gem5 component whenever a new outgoing bridge is added in a gem5 config. Change-Id: I45f0013bc35d088df0aa5a71951422cabab4d7f7 Signed-off-by: Kaustav Goswami --- ext/sst/INSTALL.md | 28 ++++++++++++++++++++-------- ext/sst/sst/arm_example.py | 12 ++++++++++++ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/ext/sst/INSTALL.md b/ext/sst/INSTALL.md index ba61996b32..63340bef51 100644 --- a/ext/sst/INSTALL.md +++ b/ext/sst/INSTALL.md @@ -1,5 +1,8 @@ # Installing SST +The links to download SST source code are available at +. +This guide is using the most recent SST version (13.0.0) as of September 2023. The links to download SST source code are available at . This guide is using the most recent SST version (13.0.0) as of September 2023. @@ -12,13 +15,14 @@ installed. ```sh wget https://github.com/sstsimulator/sst-core/releases/download/v13.0.0_Final/sstcore-13.0.0.tar.gz -tar xzf sstcore-13.0.0.tar.gz +tar xvf sstcore-13.0.0.tar.gz ``` ### Installing SST-Core ```sh cd sstcore-13.0.0 +cd sstcore-13.0.0 ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \ --disable-mpi # optional, used when MPI is not available. make all -j$(nproc) @@ -37,13 +41,14 @@ export PATH=$SST_CORE_HOME/bin:$PATH ```sh wget https://github.com/sstsimulator/sst-elements/releases/download/v13.0.0_Final/sstelements-13.0.0.tar.gz -tar xzf sstelements-13.0.0.tar.gz +tar xvf sstelements-13.0.0.tar.gz ``` ### Installing SST-Elements ```sh cd sst-elements-library-13.0.0 +cd sst-elements-library-13.0.0 ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \ --with-sst-core=$SST_CORE_HOME make all -j$(nproc) @@ -59,8 +64,7 @@ echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$SST_CORE_HOME/lib/pkgconfig/" >> ### Building gem5 library At the root of the gem5 folder, you need to compile gem5 as a library. This -varies dependent on which OS you are using. If you're using Linux, then -execute the following: +varies which OS you use. If you're using Linux, then type the following: ```sh scons build/RISCV/libgem5_opt.so -j $(nproc) --without-tcmalloc --duplicate-sources ``` @@ -68,7 +72,15 @@ In case you're using Mac, then type the following: ```sh scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-sources ``` +In case you're using Mac, then type the following: +```sh +scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-sources +``` +**Note:** +* `--without-tcmalloc` is required to avoid a conflict with SST's malloc. +* `--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory. +* The Mac version was tested on a Macbook Air with M2 processor. **Note:** * `--without-tcmalloc` is required to avoid a conflict with SST's malloc. * `--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory. @@ -76,18 +88,18 @@ scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-s ### Compiling the SST integration +Go to the SST directory in the gem5 repo. Go to the SST directory in the gem5 repo. ```sh cd ext/sst ``` -Depending on your OS, you need to copy the correct `Makefile.xxx` file to -`Makefile`. +According to the OS that you're using, you need to rename the `Makefile.xxx` to `Makefile`. ```sh cp Makefile.xxx Makefile # linux or mac make -j4 ``` -The make file is hardcoded to RISC-V. IN the case you wish to compile to ARM, -edit the Makefile or pass `ARCH=RISCV` to `ARCH=ARM` while compiling. +Change `ARCH=RISCV` to `ARCH=ARM` in the `Makefile` in case you're compiling +for ARM. ### Running an example simulation See `README.md` diff --git a/ext/sst/sst/arm_example.py b/ext/sst/sst/arm_example.py index 4bc111cb86..9e57e3afa4 100644 --- a/ext/sst/sst/arm_example.py +++ b/ext/sst/sst/arm_example.py @@ -35,6 +35,18 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Copyright (c) 2021 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. import sst import sys From 0cfb5f52054c2213267075b837eb45822ae75b01 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Mon, 30 Oct 2023 17:00:37 -0700 Subject: [PATCH 05/23] ext,configs: adding a disaggregated memory setup This change adds necessasry tools to simulate multiple gem5 nodes to simulate a disaggregated memory setup using gem5 and SST. Change-Id: I6e1e52d4ba8df7c161b3151c9b2c02b72fc7cc31 Signed-off-by: Kaustav Goswami --- .../arm_dm_sst_board.py | 220 ++++++++ disaggregated_memory_setup/arm_sst_board.py | 220 ++++++++ disaggregated_memory_setup/dm_caches.py | 126 +++++ disaggregated_memory_setup/dm_caches_arm.py | 122 +++++ .../mi_example_dm_caches.py | 191 +++++++ disaggregated_memory_setup/numa_config.py | 126 +++++ disaggregated_memory_setup/numa_config_sst.py | 131 +++++ .../numa_config_sst_nodes.py | 172 ++++++ .../numa_config_sst_nodes_arm.py | 139 +++++ .../numa_config_w_delay.py | 136 +++++ .../numa_config_w_ruby_caches.py | 160 ++++++ disaggregated_memory_setup/numa_config_x86.py | 129 +++++ disaggregated_memory_setup/remote_memory.py | 92 ++++ disaggregated_memory_setup/riscv_dm_board.py | 454 ++++++++++++++++ disaggregated_memory_setup/riscv_sst_board.py | 514 ++++++++++++++++++ .../simulator_project.ipynb | 159 ++++++ disaggregated_memory_setup/test_board_sst.py | 234 ++++++++ disaggregated_memory_setup/traffic_gen_sst.py | 136 +++++ disaggregated_memory_setup/x86_dm_board.py | 383 +++++++++++++ ext/sst/gem5.hh | 10 +- ext/sst/sst/example_board.py | 144 +++++ ext/sst/sst/example_board_x86.py | 144 +++++ ext/sst/sst/example_freq.py | 139 +++++ ext/sst/sst/example_nodes.py | 221 ++++++++ ext/sst/sst/example_nodes_multi_ISA.py | 234 ++++++++ ext/sst/sst/example_nodes_w_board.py | 206 +++++++ ext/sst/sst/example_nodes_w_board_arm.py | 201 +++++++ ext/sst/sst/example_nodes_w_board_full_mem.py | 204 +++++++ ext/sst/sst/example_traffic_gen.py | 218 ++++++++ ext/sst/sst_responder.cc | 2 + ext/sst/sst_responder_subcomponent.cc | 60 ++ ext/sst/translator.hh | 6 +- src/sst/outgoing_request_bridge.cc | 41 +- src/sst/outgoing_request_bridge.hh | 22 + 34 files changed, 5685 insertions(+), 11 deletions(-) create mode 100644 disaggregated_memory_setup/arm_dm_sst_board.py create mode 100644 disaggregated_memory_setup/arm_sst_board.py create mode 100644 disaggregated_memory_setup/dm_caches.py create mode 100644 disaggregated_memory_setup/dm_caches_arm.py create mode 100644 disaggregated_memory_setup/mi_example_dm_caches.py create mode 100644 disaggregated_memory_setup/numa_config.py create mode 100644 disaggregated_memory_setup/numa_config_sst.py create mode 100644 disaggregated_memory_setup/numa_config_sst_nodes.py create mode 100644 disaggregated_memory_setup/numa_config_sst_nodes_arm.py create mode 100644 disaggregated_memory_setup/numa_config_w_delay.py create mode 100644 disaggregated_memory_setup/numa_config_w_ruby_caches.py create mode 100644 disaggregated_memory_setup/numa_config_x86.py create mode 100644 disaggregated_memory_setup/remote_memory.py create mode 100644 disaggregated_memory_setup/riscv_dm_board.py create mode 100644 disaggregated_memory_setup/riscv_sst_board.py create mode 100644 disaggregated_memory_setup/simulator_project.ipynb create mode 100644 disaggregated_memory_setup/test_board_sst.py create mode 100644 disaggregated_memory_setup/traffic_gen_sst.py create mode 100644 disaggregated_memory_setup/x86_dm_board.py create mode 100644 ext/sst/sst/example_board.py create mode 100644 ext/sst/sst/example_board_x86.py create mode 100644 ext/sst/sst/example_freq.py create mode 100644 ext/sst/sst/example_nodes.py create mode 100644 ext/sst/sst/example_nodes_multi_ISA.py create mode 100644 ext/sst/sst/example_nodes_w_board.py create mode 100644 ext/sst/sst/example_nodes_w_board_arm.py create mode 100644 ext/sst/sst/example_nodes_w_board_full_mem.py create mode 100644 ext/sst/sst/example_traffic_gen.py diff --git a/disaggregated_memory_setup/arm_dm_sst_board.py b/disaggregated_memory_setup/arm_dm_sst_board.py new file mode 100644 index 0000000000..5ec6b28d90 --- /dev/null +++ b/disaggregated_memory_setup/arm_dm_sst_board.py @@ -0,0 +1,220 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + ArmSystem, +) + +from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation +from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease +from m5.objects.ArmFsWorkload import ArmFsLinux + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + +import os +import m5 +from abc import ABCMeta +from gem5.components.boards.arm_board import ArmBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class ArmDMSSTBoard(ArmBoard): + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_range: AddrRange, + platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), + release: ArmRelease = ArmDefaultRelease(), + ) -> None: + + self._remote_memory_range = remote_memory_range + super().__init__( + clk_freq=clk_freq, + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, + platform=platform, + release=release, + ) + + def get_remote_memory_addr_range(self): + return self._remote_memory_range + + @overrides(ArmBoard) + def _setup_board(self) -> None: + + # This board is expected to run full-system simulation. + # Loading ArmFsLinux() from `src/arch/arm/ArmFsWorkload.py` + self.workload = ArmFsLinux() + + # We are fixing the following variable for the ArmSystem to work. The + # security extension is checked while generating the dtb file in + # realview. This board does not have security extension enabled. + self._have_psci = False + + # highest_el_is_64 is set to True. True if the register width of the + # highest implemented exception level is 64 bits. + self.highest_el_is_64 = True + + # Setting up the voltage and the clock domain here for the ARM board. + # The ArmSystem/RealView expects voltage_domain to be a parameter. + # The voltage and the clock frequency are taken from the devices.py + # file from configs/example/arm. We set the clock to the same frequency + # as the user specified in the config script. + self.voltage_domain = VoltageDomain(voltage="1.0V") + self.clk_domain = SrcClockDomain( + clock=self._clk_freq, voltage_domain=self.voltage_domain + ) + + # The ARM board supports both Terminal and VncServer. + self.terminal = Terminal() + self.vncserver = VncServer() + + # Incoherent I/O Bus + self.iobus = IOXBar() + self.iobus.badaddr_responder = BadAddr() + self.iobus.default = self.iobus.badaddr_responder.pio + + # We now need to setup the dma_ports. + self._dma_ports = None + + # RealView sets up most of the on-chip and off-chip devices and GIC + # for the ARM board. These devices' information is also used to + # generate the dtb file. We then connect the I/O devices to the + # I/O bus. + self._setup_io_devices() + + # Once the realview is setup, we can continue setting up the memory + # ranges. ArmBoard's memory can only be setup once realview is + # initialized. + memory = self.get_memory() + mem_size = memory.get_size() + + # The following code is taken from configs/example/arm/devices.py. It + # sets up all the memory ranges for the board. + self.mem_ranges = [] + success = False + # self.mem_ranges.append(self.get_remote_memory_addr_range()) + for mem_range in self.realview._mem_regions: + size_in_range = min(mem_size, mem_range.size()) + self.mem_ranges.append( + AddrRange(start=mem_range.start, size=size_in_range) + ) + + mem_size -= size_in_range + if mem_size == 0: + success = True + break + + if success: + memory.set_memory_range(self.mem_ranges) + else: + raise ValueError("Memory size too big for platform capabilities") + + self.mem_ranges.append(self.get_remote_memory_addr_range()) + + # The PCI Devices. PCI devices can be added via the `_add_pci_device` + # function. + self._pci_devices = [] + + @overrides(ArmSystem) + def generateDeviceTree(self, state): + # Generate a device tree root node for the system by creating the root + # node and adding the generated subnodes of all children. + # When a child needs to add multiple nodes, this is done by also + # creating a node called '/' which will then be merged with the + # root instead of appended. + + def generateMemNode(numa_node_id, mem_range): + node = FdtNode(f"memory@{int(mem_range.start):x}") + node.append(FdtPropertyStrings("device_type", ["memory"])) + node.append( + FdtPropertyWords( + "reg", + state.addrCells(mem_range.start) + + state.sizeCells(mem_range.size()), + ) + ) + node.append(FdtPropertyWords("numa-node-id", [numa_node_id])) + return node + + root = FdtNode("/") + root.append(state.addrCellsProperty()) + root.append(state.sizeCellsProperty()) + + # Add memory nodes + for mem_range in self.mem_ranges: + root.append(generateMemNode(0, mem_range)) + root.append(generateMemNode(1, self.get_remote_memory_addr_range())) + + for node in self.recurseDeviceTree(state): + # Merge root nodes instead of adding them (for children + # that need to add multiple root level nodes) + if node.get_name() == root.get_name(): + root.merge(node) + else: + root.append(node) + + return root + + @overrides(ArmBoard) + def get_default_kernel_args(self) -> List[str]: + + # The default kernel string is taken from the devices.py file. + return [ + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "root={root_value}", + "rw", + f"mem={self.get_memory().get_size()}", + ] diff --git a/disaggregated_memory_setup/arm_sst_board.py b/disaggregated_memory_setup/arm_sst_board.py new file mode 100644 index 0000000000..ae5385530b --- /dev/null +++ b/disaggregated_memory_setup/arm_sst_board.py @@ -0,0 +1,220 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + ArmSystem, +) + +from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation +from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease +from m5.objects.ArmFsWorkload import ArmFsLinux + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + +import os +import m5 +from abc import ABCMeta +from gem5.components.boards.arm_board import ArmBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class ArmDMSSTBoard(ArmBoard): + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_range: AddrRange, + platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), + release: ArmRelease = ArmDefaultRelease(), + ) -> None: + + self._remote_memory_range = remote_memory_range + super().__init__( + clk_freq=clk_freq, + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, + platform=platform, + release=release, + ) + + def get_remote_memory_addr_range(self): + return self._remote_memory_range + + @overrides(ArmBoard) + def _setup_board(self) -> None: + + # This board is expected to run full-system simulation. + # Loading ArmFsLinux() from `src/arch/arm/ArmFsWorkload.py` + self.workload = ArmFsLinux() + + # We are fixing the following variable for the ArmSystem to work. The + # security extension is checked while generating the dtb file in + # realview. This board does not have security extension enabled. + self._have_psci = False + + # highest_el_is_64 is set to True. True if the register width of the + # highest implemented exception level is 64 bits. + self.highest_el_is_64 = True + + # Setting up the voltage and the clock domain here for the ARM board. + # The ArmSystem/RealView expects voltage_domain to be a parameter. + # The voltage and the clock frequency are taken from the devices.py + # file from configs/example/arm. We set the clock to the same frequency + # as the user specified in the config script. + self.voltage_domain = VoltageDomain(voltage="1.0V") + self.clk_domain = SrcClockDomain( + clock=self._clk_freq, voltage_domain=self.voltage_domain + ) + + # The ARM board supports both Terminal and VncServer. + self.terminal = Terminal() + self.vncserver = VncServer() + + # Incoherent I/O Bus + self.iobus = IOXBar() + self.iobus.badaddr_responder = BadAddr() + self.iobus.default = self.iobus.badaddr_responder.pio + + # We now need to setup the dma_ports. + self._dma_ports = None + + # RealView sets up most of the on-chip and off-chip devices and GIC + # for the ARM board. These devices' information is also used to + # generate the dtb file. We then connect the I/O devices to the + # I/O bus. + self._setup_io_devices() + + # Once the realview is setup, we can continue setting up the memory + # ranges. ArmBoard's memory can only be setup once realview is + # initialized. + memory = self.get_memory() + mem_size = memory.get_size() + + # The following code is taken from configs/example/arm/devices.py. It + # sets up all the memory ranges for the board. + self.mem_ranges = [] + success = False + # self.mem_ranges.append(self.get_remote_memory_addr_range()) + for mem_range in self.realview._mem_regions: + size_in_range = min(mem_size, mem_range.size()) + self.mem_ranges.append( + AddrRange(start=mem_range.start, size=size_in_range) + ) + + mem_size -= size_in_range + if mem_size == 0: + success = True + break + + if success: + memory.set_memory_range(self.mem_ranges) + else: + raise ValueError("Memory size too big for platform capabilities") + + self.mem_ranges.append(self.get_remote_memory_addr_range()) + + # The PCI Devices. PCI devices can be added via the `_add_pci_device` + # function. + self._pci_devices = [] + + @overrides(ArmSystem) + def generateDeviceTree(self, state): + # Generate a device tree root node for the system by creating the root + # node and adding the generated subnodes of all children. + # When a child needs to add multiple nodes, this is done by also + # creating a node called '/' which will then be merged with the + # root instead of appended. + + def generateMemNode(numa_node_id, mem_range): + node = FdtNode(f"memory@{int(mem_range.start):x}") + node.append(FdtPropertyStrings("device_type", ["memory"])) + node.append( + FdtPropertyWords( + "reg", + state.addrCells(mem_range.start) + + state.sizeCells(mem_range.size()), + ) + ) + node.append(FdtPropertyWords("numa-node-id", [numa_node_id])) + return node + + root = FdtNode("/") + root.append(state.addrCellsProperty()) + root.append(state.sizeCellsProperty()) + + # Add memory nodes + for mem_range in self.mem_ranges: + root.append(generateMemNode(0, mem_range)) + root.append(generateMemNode(1, self.get_remote_memory_addr_range())) + + for node in self.recurseDeviceTree(state): + # Merge root nodes instead of adding them (for children + # that need to add multiple root level nodes) + if node.get_name() == root.get_name(): + root.merge(node) + else: + root.append(node) + + return root + + @overrides(ArmBoard) + def get_default_kernel_args(self) -> List[str]: + + # The default kernel string is taken from the devices.py file. + return [ + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "root={root_value}", + "rw", + # f"mem={self.get_memory().get_size()}", + ] diff --git a/disaggregated_memory_setup/dm_caches.py b/disaggregated_memory_setup/dm_caches.py new file mode 100644 index 0000000000..f69cd30d0a --- /dev/null +++ b/disaggregated_memory_setup/dm_caches.py @@ -0,0 +1,126 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( + PrivateL1PrivateL2CacheHierarchy, +) +from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.isas import ISA +from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port + +from gem5.utils.override import overrides + + +class ClassicPL1PL2DMCache(PrivateL1PrivateL2CacheHierarchy): + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + ) -> None: + """ + :param l1d_size: The size of the L1 Data Cache (e.g., "32kB"). + :type l1d_size: str + :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB"). + :type l1i_size: str + :param l2_size: The size of the L2 Cache (e.g., "256kB"). + :type l2_size: str + :param membus: The memory bus. This parameter is optional parameter and + will default to a 64 bit width SystemXBar is not specified. + :type membus: BaseXBar + """ + super().__init__(l1i_size, l1d_size, l2_size) + + @overrides(PrivateL1PrivateL2CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for cntr in board.get_local_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + # comment these lines for SST caches + # for cntr in board.get_remote_memory().get_memory_controllers(): + # cntr.port = self.membus.mem_side_ports + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.membus.cpu_side_ports = self.l2caches[i].mem_side + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() diff --git a/disaggregated_memory_setup/dm_caches_arm.py b/disaggregated_memory_setup/dm_caches_arm.py new file mode 100644 index 0000000000..d732f7c357 --- /dev/null +++ b/disaggregated_memory_setup/dm_caches_arm.py @@ -0,0 +1,122 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( + PrivateL1PrivateL2CacheHierarchy, +) +from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.isas import ISA +from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port + +from gem5.utils.override import overrides + + +class ClassicPL1PL2DMCacheArm(PrivateL1PrivateL2CacheHierarchy): + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + ) -> None: + """ + :param l1d_size: The size of the L1 Data Cache (e.g., "32kB"). + :type l1d_size: str + :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB"). + :type l1i_size: str + :param l2_size: The size of the L2 Cache (e.g., "256kB"). + :type l2_size: str + :param membus: The memory bus. This parameter is optional parameter and + will default to a 64 bit width SystemXBar is not specified. + :type membus: BaseXBar + """ + super().__init__(l1i_size, l1d_size, l2_size) + + @overrides(PrivateL1PrivateL2CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for cntr in board.get_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.membus.cpu_side_ports = self.l2caches[i].mem_side + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() diff --git a/disaggregated_memory_setup/mi_example_dm_caches.py b/disaggregated_memory_setup/mi_example_dm_caches.py new file mode 100644 index 0000000000..d07e058dee --- /dev/null +++ b/disaggregated_memory_setup/mi_example_dm_caches.py @@ -0,0 +1,191 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.ruby.mi_example_cache_hierarchy import ( + MIExampleCacheHierarchy, +) +from gem5.components.cachehierarchies.ruby.caches.mi_example.l1_cache import ( + L1Cache, +) +from gem5.components.cachehierarchies.ruby.caches.mi_example.dma_controller import ( + DMAController, +) +from gem5.components.cachehierarchies.ruby.caches.mi_example.directory import ( + Directory, +) +from gem5.components.cachehierarchies.ruby.topologies.simple_pt2pt import ( + SimplePt2Pt, +) + +# from gem5.components.cachehierarchies.ruby.abstract_ruby_cache_hierarchy import AbstractRubyCacheHierarchy +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.coherence_protocol import CoherenceProtocol +from gem5.isas import ISA +from gem5.utils.override import overrides +from gem5.utils.requires import requires +from m5.objects import RubySystem, RubySequencer, DMASequencer, RubyPortProxy + +# from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +# from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +# from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +# from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +# from gem5.components.boards.abstract_board import AbstractBoard +# from gem5.isas import ISA +# from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port + +# from gem5.utils.override import overrides + + +class MIExampleDMCache(MIExampleCacheHierarchy): + def __init__(self, size: str, assoc: str): + """ + :param size: The size of each cache in the heirarchy. + :param assoc: The associativity of each cache. + """ + super().__init__(size, assoc) + + @overrides(MIExampleCacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + requires(coherence_protocol_required=CoherenceProtocol.MI_EXAMPLE) + + self.ruby_system = RubySystem() + + # Ruby's global network. + self.ruby_system.network = SimplePt2Pt(self.ruby_system) + + # MI Example users 5 virtual networks. + self.ruby_system.number_of_virtual_networks = 5 + self.ruby_system.network.number_of_virtual_networks = 5 + + # There is a single global list of all of the controllers to make it + # easier to connect everything to the global network. This can be + # customized depending on the topology/network requirements. + # Create one controller for each L1 cache (and the cache mem obj.) + # Create a single directory controller (Really the memory cntrl). + self._controllers = [] + for i, core in enumerate(board.get_processor().get_cores()): + cache = L1Cache( + size=self._size, + assoc=self._assoc, + network=self.ruby_system.network, + core=core, + cache_line_size=board.get_cache_line_size(), + target_isa=board.get_processor().get_isa(), + clk_domain=board.get_clock_domain(), + ) + + cache.sequencer = RubySequencer( + version=i, + dcache=cache.cacheMemory, + clk_domain=cache.clk_domain, + ) + + if board.has_io_bus(): + cache.sequencer.connectIOPorts(board.get_io_bus()) + + cache.ruby_system = self.ruby_system + + core.connect_icache(cache.sequencer.in_ports) + core.connect_dcache(cache.sequencer.in_ports) + + core.connect_walker_ports( + cache.sequencer.in_ports, cache.sequencer.in_ports + ) + + # Connect the interrupt ports + if board.get_processor().get_isa() == ISA.X86: + int_req_port = cache.sequencer.interrupt_out_port + int_resp_port = cache.sequencer.in_ports + core.connect_interrupt(int_req_port, int_resp_port) + else: + core.connect_interrupt() + + cache.ruby_system = self.ruby_system + self._controllers.append(cache) + + # Create the directory controllers + self._directory_controllers = [] + for range, port in board.get_mem_ports(): + dir = Directory( + self.ruby_system.network, + board.get_cache_line_size(), + range, + port, + ) + dir.ruby_system = self.ruby_system + self._directory_controllers.append(dir) + + for range, port in board.get_remote_mem_ports(): + dir = Directory( + self.ruby_system.network, + board.get_cache_line_size(), + range, + port, + ) + dir.ruby_system = self.ruby_system + self._directory_controllers.append(dir) + + # Create the DMA Controllers, if required. + self._dma_controllers = [] + if board.has_dma_ports(): + dma_ports = board.get_dma_ports() + for i, port in enumerate(dma_ports): + ctrl = DMAController( + self.ruby_system.network, board.get_cache_line_size() + ) + ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port) + + ctrl.ruby_system = self.ruby_system + ctrl.dma_sequencer.ruby_system = self.ruby_system + + self._dma_controllers.append(ctrl) + + self.ruby_system.num_of_sequencers = len(self._controllers) + len( + self._dma_controllers + ) + + # Connect the controllers. + self.ruby_system.controllers = self._controllers + self.ruby_system.directory_controllers = self._directory_controllers + + if len(self._dma_controllers) != 0: + self.ruby_system.dma_controllers = self._dma_controllers + + self.ruby_system.network.connectControllers( + self._controllers + + self._directory_controllers + + self._dma_controllers + ) + self.ruby_system.network.setup_buffers() + + # Set up a proxy port for the system_port. Used for load binaries and + # other functional-only things. + self.ruby_system.sys_port_proxy = RubyPortProxy() + board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) diff --git a/disaggregated_memory_setup/numa_config.py b/disaggregated_memory_setup/numa_config.py new file mode 100644 index 0000000000..2436a7c735 --- /dev/null +++ b/disaggregated_memory_setup/numa_config.py @@ -0,0 +1,126 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +2 TIMING CPU cores. The simulation ends when the startup is completed +successfully. + +Usage +----- + +``` +scons build/RISCV/gem5.opt +./build/RISCV/gem5.opt \ + configs/example/gem5_library/riscv-ubuntu-run.py +``` +""" + +import m5 +from m5.objects import Root + +from gem5.utils.requires import requires +from riscv_dm_board import RiscvDMBoard +from dm_caches import ClassicPL1PL2DMCache +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +# Here we setup the parameters of the l1 and l2 caches. + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="2MB", l1i_size="2MB", l2_size="4MB" +) + +# Memory: Dual Channel DDR4 2400 DRAM device. + +local_memory = DualChannelDDR4_2400(size="64MiB") +remote_memory = DualChannelDDR4_2400(size="64MiB") + +# remote_memory = DualChannelHBM_1000(size="4GB") + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.ATOMIC, isa=ISA.RISCV, num_cores=1 +) + +# Here we setup the board. The RiscvBoard allows for Full-System RISCV +# simulations. +board = RiscvDMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "bin/bash;" + # "m5 exit;" +] + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "disk_image": DiskImageResource( + # local_path=os.path.join( + # os.getcwd(), "/home/kaustavg/ubuntu-numa.img" + # ), + local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img", + root_partition="1", + ), + "kernel": CustomResource( + "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/SST13/kernels/gem5-resources/src/riscv-boot-exit-nodisk/riscv-pk/build/bbl" + # os.path.join(os.getcwd(), "/home/kaustavg/bbl") + ), + "readfile_contents": " ".join(cmd), + }, +) +print("______", " ".join(cmd)) +# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots +# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` +# instruction which stops the simulation. When the simulation has ended you may +# inspect `m5out/system.pc.com_1.device` to see the stdout. +# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) + +# This disk image has NUMA tools installed. +board.set_workload(workload) +simulator = Simulator(board=board) +simulator.run() +simulator.run() diff --git a/disaggregated_memory_setup/numa_config_sst.py b/disaggregated_memory_setup/numa_config_sst.py new file mode 100644 index 0000000000..42ce794ae6 --- /dev/null +++ b/disaggregated_memory_setup/numa_config_sst.py @@ -0,0 +1,131 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +2 TIMING CPU cores. The simulation ends when the startup is completed +successfully. + +Usage +----- + +``` +scons build/RISCV/gem5.opt +./build/RISCV/gem5.opt \ + configs/example/gem5_library/riscv-ubuntu-run.py +``` +""" + +import m5 +from m5.objects import Root, NoncoherentXBar + +from gem5.utils.requires import requires +from riscv_dm_board import RiscvDMBoard +from riscv_sst_board import RiscvSstBoard +from dm_caches import ClassicPL1PL2DMCache +from gem5.components.memory import SingleChannelDDR4_2400, DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +# Here we setup the parameters of the l1 and l2 caches. + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="4MB", l1i_size="4MB", l2_size="32MB" +) + +# Memory: Dual Channel DDR4 2400 DRAM device. + +local_memory = SingleChannelDDR4_2400(size="2GiB") +# This has to be an argument coming from SST's side. +remote_memory_size = "2GiB" +# remote_memory = DualChannelDDR4_2400(size="4GB") + +# remote_memory = DualChannelHBM_1000(size="4GB") + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1 +) + +# Here we setup the board. The RiscvBoard allows for Full-System RISCV +# simulations. +board = RiscvSstBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory_size=remote_memory_size, + cache_hierarchy=cache_hierarchy, +) + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "disk_image": DiskImageResource( + # CustomDiskImageResource( + local_path=os.path.join( + os.getcwd(), "/home/kaustavg/ubuntu-numa.img" + ), + root_partition="1", + ), + "kernel": CustomResource( + os.path.join(os.getcwd(), "/home/kaustavg/bbl") + ), + }, +) + +# board.cache_hierarchy.membus = NoncoherentXBar( +# frontend_latency=0, +# forward_latency=0, +# response_latency=0, +# header_latency=0, +# width=64, +# ) + +# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots +# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` +# instruction which stops the simulation. When the simulation has ended you may +# inspect `m5out/system.pc.com_1.device` to see the stdout. +# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) + +# This disk image has NUMA tools installed. +board.set_workload(workload) +board._pre_instantiate() +root = Root(full_system=True, system=board) + +# simulator = Simulator(board=board) +# simulator.run() +# simulator.run() diff --git a/disaggregated_memory_setup/numa_config_sst_nodes.py b/disaggregated_memory_setup/numa_config_sst_nodes.py new file mode 100644 index 0000000000..f3d6ef83cc --- /dev/null +++ b/disaggregated_memory_setup/numa_config_sst_nodes.py @@ -0,0 +1,172 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +2 TIMING CPU cores. The simulation ends when the startup is completed +successfully. + +Usage +----- + +``` +scons build/RISCV/gem5.opt +./build/RISCV/gem5.opt \ + configs/example/gem5_library/riscv-ubuntu-run.py +``` +""" + +import m5 +import argparse +from m5.objects import Root, NoncoherentXBar + +from gem5.utils.requires import requires +from riscv_dm_board import RiscvDMBoard +from riscv_sst_board import RiscvSstBoard +from dm_caches import ClassicPL1PL2DMCache +from gem5.components.memory import SingleChannelDDR4_2400, DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +parser = argparse.ArgumentParser( + description="An example configuration script to run multiple gem5 nodes." +) + +parser.add_argument( + "--cpu-clock-rate", + type=str, + required=True, + help="CPU clock rate. e.g. 3GHz etc", +) + +parser.add_argument( + "--instance", + type=int, + required=True, + help="Gem5 node instance", +) +args = parser.parse_args() +# Here we setup the parameters of the l1 and l2 caches. + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="128KiB", l1i_size="128KiB", l2_size="256KiB" +) + +# Memory: Dual Channel DDR4 2400 DRAM device. +# local_memory_size = str(2 * (int(args.instance) + 1)) + "GiB" +local_memory_size = "2GiB" +local_memory = SingleChannelDDR4_2400(size=local_memory_size) +# This has to be an argument coming from SST's side. +remote_memory_size = "2GiB" +print(local_memory_size) + +# remote_memory = DualChannelDDR4_2400(size="4GB") + +# remote_memory = DualChannelHBM_1000(size="4GB") + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor(cpu_type=CPUTypes.O3, isa=ISA.RISCV, num_cores=1) +# processor = SimpleSwitchableProcessor( +# first_cpu_type=CPUTypes.O3, +# isa=ISA.RISCV, num_cores=1 +# ) +# Here we setup the board. The RiscvBoard allows for Full-System RISCV +# simulations. +board = RiscvSstBoard( + clk_freq=args.cpu_clock_rate, + processor=processor, + local_memory=local_memory, + remote_memory_size=remote_memory_size, + cache_hierarchy=cache_hierarchy, + instance=args.instance, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "m5 dumpresetstats 0 ;", + "numactl --cpubind=0 --membind=0 -- /home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/stream.hw 1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=0,1 -- /home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/stream.hw 1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=1 -- /home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/stream.hw 1000000;", + "m5 dumpresetstats 0;", + "m5 exit;", +] + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "disk_image": DiskImageResource( + # CustomDiskImageResource( + # local_path=os.path.join( + # os.getcwd(), "/home/kaustavg/ubuntu-numa.img" + # "/home/kaustavg/disk-images/rv64gc-hpc-2204.img" + # ), + local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img", + root_partition="1", + ), + "kernel": CustomResource( + os.path.join(os.getcwd(), "/home/kaustavg/bbl") + ), + "readfile_contents": " ".join(cmd), + }, +) + +# board.cache_hierarchy.membus = NoncoherentXBar( +# frontend_latency=0, +# forward_latency=0, +# response_latency=0, +# header_latency=0, +# width=64, +# ) + +# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots +# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` +# instruction which stops the simulation. When the simulation has ended you may +# inspect `m5out/system.pc.com_1.device` to see the stdout. +# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) + +# This disk image has NUMA tools installed. +board.set_workload(workload) +board._pre_instantiate() +root = Root(full_system=True, system=board) +m5.instantiate() +# simulator = Simulator(board=board) +# simulator.run() +# simulator.run() diff --git a/disaggregated_memory_setup/numa_config_sst_nodes_arm.py b/disaggregated_memory_setup/numa_config_sst_nodes_arm.py new file mode 100644 index 0000000000..8a255e6c3f --- /dev/null +++ b/disaggregated_memory_setup/numa_config_sst_nodes_arm.py @@ -0,0 +1,139 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import m5 +from m5.objects import Root, OutgoingRequestBridge, AddrRange + +from gem5.utils.requires import requires +from arm_dm_sst_board import ArmDMSSTBoard +from dm_caches_arm import ClassicPL1PL2DMCacheArm +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * +from gem5.utils.override import overrides +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--command", type=str, help="Command run by guest") +parser.add_argument( + "--cpu-type", + type=str, + choices=["atomic", "timing"], + default="atomic", + help="CPU type", +) +parser.add_argument( + "--cpu-clock-rate", + type=str, + required=True, + help="CPU Clock", +) +parser.add_argument( + "--remote-memory-range", + type=str, + # default="2GiB", + required=True, + help="Remote memory range", +) +parser.add_argument( + "--local-memory-range", + type=str, + # default="2GiB", + required=True, + help="Local memory range", +) +args = parser.parse_args() +command = args.command +remote_memory_range = list(map(int, args.remote_memory_range.split(","))) +remote_memory_range = AddrRange(remote_memory_range[0], remote_memory_range[1]) +print(remote_memory_range) +requires(isa_required=ISA.ARM) + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="128KiB", l1i_size="128KiB", l2_size="256KiB" +) + +local_memory = DualChannelDDR4_2400(size=args.local_memory_range) + +cpu_type = {"atomic": CPUTypes.ATOMIC, "timing": CPUTypes.TIMING}[ + args.cpu_type +] +processor = SimpleProcessor(cpu_type=cpu_type, isa=ISA.ARM, num_cores=1) + + +class MyBoard(ArmDMSSTBoard): + @overrides(ArmDMSSTBoard) + def _pre_instantiate(self): + super()._pre_instantiate() + self.remote_memory_outgoing_bridge = OutgoingRequestBridge() + self.remote_memory_outgoing_bridge.physical_address_ranges = [ + self.get_remote_memory_addr_range() + ] + self.remote_memory_outgoing_bridge.port = ( + self.cache_hierarchy.membus.mem_side_ports + ) + + @overrides(ArmDMSSTBoard) + def get_default_kernel_args(self): + return [ + "root=/dev/vda1", + "init=/root/gem5-init.sh", + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "rw", + # f"mem={self.get_memory().get_size()}", + ] + + +board = MyBoard( + clk_freq=args.cpu_clock_rate, + processor=processor, + memory=local_memory, + remote_memory_range=remote_memory_range, + cache_hierarchy=cache_hierarchy, +) + +board.set_kernel_disk_workload( + kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), + bootloader=Resource("arm64-bootloader-foundation"), + disk_image=DiskImageResource( + "/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img" + ), + readfile_contents=f"mount -t sysfs - /sys; mount -t proc - /proc; {command};", +) + +board._pre_instantiate() +root = Root(full_system=True, system=board) + +# simulator = Simulator(board=board) +# simulator._instantiate() diff --git a/disaggregated_memory_setup/numa_config_w_delay.py b/disaggregated_memory_setup/numa_config_w_delay.py new file mode 100644 index 0000000000..1af6392f96 --- /dev/null +++ b/disaggregated_memory_setup/numa_config_w_delay.py @@ -0,0 +1,136 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +2 TIMING CPU cores. The simulation ends when the startup is completed +successfully. + +Usage +----- + +``` +scons build/RISCV/gem5.opt +./build/RISCV/gem5.opt \ + configs/example/gem5_library/riscv-ubuntu-run.py +``` +""" + +import m5 +from m5.objects import Root + +from gem5.utils.requires import requires +from riscv_dm_board import RiscvDMBoard +from dm_caches import ClassicPL1PL2DMCache +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 +from remote_memory import RemoteChanneledMemory + +# defining a new type of memory with latency added. +def RemoteDualChannelDDR4_2400( + size: Optional[str] = None, remote_offset_latency=300 +) -> AbstractMemorySystem: + """ + A dual channel memory system using DDR4_2400_8x8 based DIMM + """ + return RemoteChanneledMemory( + DDR4_2400_8x8, + 2, + 64, + size=size, + remote_offset_latency=remote_offset_latency, + ) + + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +# With RISCV, we use simple caches. + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="16kB", l1i_size="16kB", l2_size="256kB" +) + +# Local memory: Dual Channel DDR4 2400 DRAM device with no delay. +# Remote memory: Dual Channel DDR4 2400 DRAM device with 750 clocks (250 ns). +# 250 ns is taken from the TPP paper. + +local_memory = DualChannelDDR4_2400(size="512MB") +remote_memory = RemoteDualChannelDDR4_2400( + size="2GB", remote_offset_latency=750 +) + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=2 +) + +# Here we setup the board. The RiscvBoard allows for Full-System RISCV +# simulations. +board = RiscvDMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "disk_image": CustomDiskImageResource( + local_path=os.path.join( + os.getcwd(), "/home/kaustavg/disk-images/rv64gc-hpc-2204.img" + ), + disk_root_partition="1", + ), + "kernel": CustomResource( + os.path.join(os.getcwd(), "/home/kaustavg/bbl") + ), + }, +) + +# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots +# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` +# instruction which stops the simulation. When the simulation has ended you may +# inspect `m5out/system.pc.com_1.device` to see the stdout. +# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) +board.set_workload(workload) +simulator = Simulator(board=board) +simulator.run() +simulator.run() diff --git a/disaggregated_memory_setup/numa_config_w_ruby_caches.py b/disaggregated_memory_setup/numa_config_w_ruby_caches.py new file mode 100644 index 0000000000..e3026d0951 --- /dev/null +++ b/disaggregated_memory_setup/numa_config_w_ruby_caches.py @@ -0,0 +1,160 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +2 TIMING CPU cores. The simulation ends when the startup is completed +successfully. + +Usage +----- + +``` +scons build/RISCV/gem5.opt +./build/RISCV/gem5.opt \ + configs/example/gem5_library/riscv-ubuntu-run.py +``` +""" + +import m5 +from m5.objects import Root + +from gem5.utils.requires import requires +from riscv_dm_board import RiscvDMBoard +from dm_caches import ClassicPL1PL2DMCache +from mi_example_dm_caches import MIExampleDMCache +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +# With RISCV, we use simple caches. +# from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( +# PrivateL1PrivateL2CacheHierarchy, +# ) +## +# from gem5.components.cachehierarchies.classic.no_cache import NoCache + +# # Here we setup the parameters of the l1 and l2 caches. +# cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( +# l1d_size="16kB", l1i_size="16kB", l2_size="256kB" +# ) +## +# from gem5.components.cachehierarchies.ruby.mi_example_cache_hierarchy import ( +# MIExampleCacheHierarchy +# ) + +# Here we setup the parameters of the l1 and l2 caches. +# cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( +# l1d_size="16kB", l1i_size="16kB", l2_size="256kB" +# ) +# Here we setup the parameters of the l1 and l2 caches. + +# cache_hierarchy = ClassicPL1PL2DMCache( +# l1d_size="16kB", l1i_size="16kB", l2_size="256kB" +# ) + +cache_hierarchy = MIExampleDMCache(size="256kB", assoc=8) + +# cache_hierarchy = MIExampleCacheHierarchy( +# size="16kB", assoc="8" +# ) + +# cache_hierarchy = NoCache() + +# Memory: Dual Channel DDR4 2400 DRAM device. + +# local_memory = DualChannelDDR4_2400(size="512MB") +local_memory = DualChannelDDR4_2400(size="512MB") +remote_memory = DualChannelDDR4_2400(size="2GB") + +# remote_memory = DualChannelHBM_1000(size="4GB") + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=4 +) + +# Here we setup the board. The RiscvBoard allows for Full-System RISCV +# simulations. +board = RiscvDMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + # "disk_image" : DiskImageResource(os.path.join(os.getcwd(), + # "arm64-ubuntu-numa"), + # root_partition = "1"), + "disk_image": CustomDiskImageResource( + local_path=os.path.join( + os.getcwd(), + # "/home/kaustavg/ubuntu-numa.img"), + # "/home/kaustavg/ubuntu-numa-bench.img"), + "/home/kaustavg/disk-images/rv64gc-hpc-2204.img", + ), + # local_path = "/home/kaustavg/kernel/gem5-resources/src/riscv-ubuntu/disk-image/base/ubuntu-ML.img", + # "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/gem5-simpoint/gem5x/gem5/riscv-ubuntu-20221118.img"), + disk_root_partition="1", + ), + # root_partition = "1"), + "kernel": CustomResource( + os.path.join( + os.getcwd(), + # "x86-linux-kernel-5.4.49")) + # "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/gem5-simpoint/gem5x/gem5/riscv-bootloader-vmlinux-5.10")) + "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/gem5-simpoint/gem5x/gem5/bbl", + ) + ), + # "bootloader": CustomResource(os.path.join(os.getcwd(), + # "vmlinux-5.4.49-NUMA.riscv")) + }, +) + +# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots +# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` +# instruction which stops the simulation. When the simulation has ended you may +# inspect `m5out/system.pc.com_1.device` to see the stdout. +# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) +board.set_workload(workload) +simulator = Simulator(board=board) +simulator.run() +simulator.run() diff --git a/disaggregated_memory_setup/numa_config_x86.py b/disaggregated_memory_setup/numa_config_x86.py new file mode 100644 index 0000000000..e50534c03e --- /dev/null +++ b/disaggregated_memory_setup/numa_config_x86.py @@ -0,0 +1,129 @@ +# Copyright (c) 2021 The Regents of the University of California. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Script to run GAPBS benchmarks with gem5. The script expects the +benchmark program and the simulation size to run. The input is in the format + +The system is fixed with 2 CPU cores, MESI Two Level system cache and 3 GB +DDR4 memory. It uses the x86 board. + +This script will count the total number of instructions executed +in the ROI. It also tracks how much wallclock and simulated time. + +Usage: +------ + +``` +scons build/X86/gem5.opt +./build/X86/gem5.opt \ + configs/example/gem5_library/x86-gabps-benchmarks.py \ + --benchmark \ + --synthetic \ + --size +``` +""" + +import argparse +import time +import sys + +import m5 +from m5.objects import Root + +from gem5.utils.requires import requires +from gem5.components.boards.x86_board import X86Board +from gem5.components.memory import DualChannelDDR4_2400, SingleChannelDDR4_2400 + +# from gem5.components.processors.simple_switchable_processor import ( +# SimpleSwitchableProcessor, +# ) +from x86_dm_board import X86DMBoard +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.coherence_protocol import CoherenceProtocol +from gem5.resources.resource import obtain_resource +from gem5.simulate.simulator import Simulator +from gem5.simulate.exit_event import ExitEvent +from dm_caches import ClassicPL1PL2DMCache + +requires( + isa_required=ISA.X86, + kvm_required=True, +) + +# Following are the list of benchmark programs for gapbs + + +cache_hierarchy = ClassicPL1PL2DMCache( + l1d_size="2MB", l1i_size="2MB", l2_size="4MB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +# The X86 board only supports 3 GB of main memory. + +local_memory = SingleChannelDDR4_2400(size="1GB") +remote_mem_size = "1GiB" +# remote_memory = SingleChannelDDR4_2400(size="3GB") + +# Here we setup the processor. This is a special switchable processor in which +# a starting core type and a switch core type must be specified. Once a +# configuration is instantiated a user may call `processor.switch()` to switch +# from the starting core types to the switch core types. In this simulation +# we start with KVM cores to simulate the OS boot, then switch to the Timing +# cores for the command we wish to run after boot. + +processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=1) + +# Here we setup the board. The X86Board allows for Full-System X86 simulations + +board = X86DMBoard( + clk_freq="3GHz", + processor=processor, + memory=local_memory, + remote_memory_size=remote_mem_size, + # remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +# Here we set the FS workload, i.e., gapbs benchmark program +# After simulation has ended you may inspect +# `m5out/system.pc.com_1.device` to the stdout, if any. + +board.set_kernel_disk_workload( + # The x86 linux kernel will be automatically downloaded to the + # `~/.cache/gem5` directory if not already present. + # gapbs benchamarks was tested with kernel version 4.19.83 + kernel=obtain_resource("x86-linux-kernel-4.19.83"), + # The x86-gapbs image will be automatically downloaded to the + # `~/.cache/gem5` directory if not already present. + disk_image=obtain_resource("x86-ubuntu-18.04-img"), + # readfile_contents=command, +) +board._pre_instantiate() +root = Root(full_system=True, system=board) +# simulator = Simulator(board=board) +# simulator.run() diff --git a/disaggregated_memory_setup/remote_memory.py b/disaggregated_memory_setup/remote_memory.py new file mode 100644 index 0000000000..cd3e1afc25 --- /dev/null +++ b/disaggregated_memory_setup/remote_memory.py @@ -0,0 +1,92 @@ +# Copyright (c) 2021 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" Channeled "generic" DDR memory controllers +""" + +import m5 +from gem5.utils.override import overrides +from m5.objects import AddrRange, DRAMInterface, MemCtrl, Port +from m5.objects.XBar import NoncoherentXBar +from typing import Type, Sequence, Tuple, Optional, Union + +from gem5.components.memory.memory import ChanneledMemory + + +class RemoteChanneledMemory(ChanneledMemory): + def __init__( + self, + dram_interface_class: Type[DRAMInterface], + num_channels: Union[int, str], + interleaving_size: Union[int, str], + size: Optional[str] = None, + addr_mapping: Optional[str] = None, + remote_offset_latency: Union[int, str] = 0, + ) -> None: + self._remote_latency = remote_offset_latency + super().__init__( + dram_interface_class, + num_channels, + interleaving_size, + size, + addr_mapping, + ) + + @overrides(ChanneledMemory) + def _create_mem_interfaces_controller(self): + self._dram = [ + self._dram_class(addr_mapping=self._addr_mapping) + for _ in range(self._num_channels) + ] + self.remote_links = [ + NoncoherentXBar( + frontend_latency=self._remote_latency, + forward_latency=0, + response_latency=0, + width=8, + ) + for _ in range(self._num_channels) + ] + self.mem_ctrl = [ + MemCtrl( + dram=self._dram[i], port=self.remote_links[i].mem_side_ports + ) + for i in range(self._num_channels) + ] + + @overrides(ChanneledMemory) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return [ + (self.mem_ctrl[i].dram.range, self.remote_links[i].cpu_side_ports) + for i in range(self._num_channels) + ] + + @overrides(ChanneledMemory) + def get_memory_controllers(self): + return [ + (self.remote_links[i].cpu_side_ports) + for i in range(self._num_channels) + ] diff --git a/disaggregated_memory_setup/riscv_dm_board.py b/disaggregated_memory_setup/riscv_dm_board.py new file mode 100644 index 0000000000..1a7acb909c --- /dev/null +++ b/disaggregated_memory_setup/riscv_dm_board.py @@ -0,0 +1,454 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from typing import List, Optional, Sequence, Tuple + +from gem5.components.boards.riscv_board import RiscvBoard + +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.utils.override import overrides +from gem5.resources.resource import AbstractResource +from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload +from gem5.components.boards.abstract_system_board import AbstractSystemBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +from gem5.isas import ISA + +import m5 + +from m5.objects import AddrRange, HiFive, Frequency, Port + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + + +class RiscvDMBoard(RiscvBoard): + """ + A board capable of full system simulation for RISC-V + At a high-level, this is based on the HiFive Unmatched board from SiFive. + This board assumes that you will be booting Linux. + + **Limitations** + * Only works with classic caches + """ + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + ) -> None: + self._localMemory = local_memory + self._remoteMemory = remote_memory + super().__init__( + clk_freq=clk_freq, + processor=processor, + memory=local_memory, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + self.remote_memory = remote_memory + + if processor.get_isa() != ISA.RISCV: + raise Exception( + "The RISCVBoard requires a processor using the" + "RISCV ISA. Current processor ISA: " + f"'{processor.get_isa().name}'." + ) + + @overrides(AbstractSystemBoard) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + @overrides(AbstractSystemBoard) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_remote_memory().get_mem_ports() + + @overrides(AbstractSystemBoard) + def _setup_memory_ranges(self): + + # the memory has to be setup for both the memory ranges. there is one + # local memory range, close to the host machine and the other range is + # pure memory, far from the host. + local_memory = self.get_local_memory() + remote_memory = self.get_remote_memory() + + local_mem_size = local_memory.get_size() + remote_mem_size = remote_memory.get_size() + + self._local_mem_ranges = [ + AddrRange(start=0x80000000, size=local_mem_size) + ] + + # The remote memory starts where the local memory ends. Therefore it + # has to be offset by the local memory's size. + self._remote_mem_ranges = [ + AddrRange(start=0x80000000 + local_mem_size, size=remote_mem_size) + ] + + # using a _global_ memory range to keep a track of all the memory + # ranges. This is used to generate the dtb for this machine + self._global_mem_ranges = [] + self._global_mem_ranges.append(self._local_mem_ranges[0]) + self._global_mem_ranges.append(self._remote_mem_ranges[0]) + + # setting the memory ranges for both of the memory ranges. + local_memory.set_memory_range(self._local_mem_ranges) + remote_memory.set_memory_range(self._remote_mem_ranges) + + @overrides(RiscvBoard) + def generate_device_tree(self, outdir: str) -> None: + """Creates the dtb and dts files. + Creates two files in the outdir: 'device.dtb' and 'device.dts' + :param outdir: Directory to output the files + """ + + state = FdtState(addr_cells=2, size_cells=2, cpu_cells=1) + root = FdtNode("/") + root.append(state.addrCellsProperty()) + root.append(state.sizeCellsProperty()) + root.appendCompatible(["riscv-virtio"]) + + for idx, mem_range in enumerate(self._global_mem_ranges): + node = FdtNode("memory@%x" % int(mem_range.start)) + node.append(FdtPropertyStrings("device_type", ["memory"])) + node.append( + FdtPropertyWords( + "reg", + state.addrCells(mem_range.start) + + state.sizeCells(mem_range.size()), + ) + ) + # adding the NUMA node information so that the OS can identify all + # the NUMA ranges. + node.append(FdtPropertyWords("numa-node-id", [idx])) + root.append(node) + + # See Documentation/devicetree/bindings/riscv/cpus.txt for details. + cpus_node = FdtNode("cpus") + cpus_state = FdtState(addr_cells=1, size_cells=0) + cpus_node.append(cpus_state.addrCellsProperty()) + cpus_node.append(cpus_state.sizeCellsProperty()) + # Used by the CLINT driver to set the timer frequency. Value taken from + # RISC-V kernel docs (Note: freedom-u540 is actually 1MHz) + cpus_node.append(FdtPropertyWords("timebase-frequency", [100000000])) + + for i, core in enumerate(self.get_processor().get_cores()): + node = FdtNode(f"cpu@{i}") + node.append(FdtPropertyStrings("device_type", "cpu")) + node.append(FdtPropertyWords("reg", state.CPUAddrCells(i))) + # The CPUs are also associated to the NUMA nodes. All the CPUs are + # bound to the first NUMA node. + node.append(FdtPropertyWords("numa-node-id", [0])) + node.append(FdtPropertyStrings("mmu-type", "riscv,sv48")) + node.append(FdtPropertyStrings("status", "okay")) + node.append(FdtPropertyStrings("riscv,isa", "rv64imafdc")) + # TODO: Should probably get this from the core. + freq = self.clk_domain.clock[0].frequency + node.append(FdtPropertyWords("clock-frequency", freq)) + node.appendCompatible(["riscv"]) + int_phandle = state.phandle(f"cpu@{i}.int_state") + node.appendPhandle(f"cpu@{i}") + + int_node = FdtNode("interrupt-controller") + int_state = FdtState(interrupt_cells=1) + int_phandle = int_state.phandle(f"cpu@{i}.int_state") + int_node.append(int_state.interruptCellsProperty()) + int_node.append(FdtProperty("interrupt-controller")) + int_node.appendCompatible("riscv,cpu-intc") + int_node.append(FdtPropertyWords("phandle", [int_phandle])) + + node.append(int_node) + cpus_node.append(node) + + root.append(cpus_node) + + soc_node = FdtNode("soc") + soc_state = FdtState(addr_cells=2, size_cells=2) + soc_node.append(soc_state.addrCellsProperty()) + soc_node.append(soc_state.sizeCellsProperty()) + soc_node.append(FdtProperty("ranges")) + soc_node.appendCompatible(["simple-bus"]) + + # CLINT node + clint = self.platform.clint + clint_node = clint.generateBasicPioDeviceNode( + soc_state, "clint", clint.pio_addr, clint.pio_size + ) + int_extended = list() + for i, core in enumerate(self.get_processor().get_cores()): + phandle = soc_state.phandle(f"cpu@{i}.int_state") + int_extended.append(phandle) + int_extended.append(0x3) + int_extended.append(phandle) + int_extended.append(0x7) + clint_node.append( + FdtPropertyWords("interrupts-extended", int_extended) + ) + # NUMA information is also associated with the CLINT controller. + # In this board, the objective to associate one NUMA node to the CPUs + # and the other node with no CPUs. To generalize this, an additional + # CLINT controller has to be created on this board, which will make it + # completely NUMA, instead of just disaggregated NUMA-like board. + clint_node.append(FdtPropertyWords("numa-node-id", [0])) + clint_node.appendCompatible(["riscv,clint0"]) + soc_node.append(clint_node) + + # PLIC node + plic = self.platform.plic + plic_node = plic.generateBasicPioDeviceNode( + soc_state, "plic", plic.pio_addr, plic.pio_size + ) + + int_state = FdtState(addr_cells=0, interrupt_cells=1) + plic_node.append(int_state.addrCellsProperty()) + plic_node.append(int_state.interruptCellsProperty()) + + phandle = int_state.phandle(plic) + plic_node.append(FdtPropertyWords("phandle", [phandle])) + # Similar to the CLINT interrupt controller, another PLIC controller is + # required to make this board a general NUMA like board. + plic_node.append(FdtPropertyWords("numa-node-id", [0])) + plic_node.append(FdtPropertyWords("riscv,ndev", [plic.n_src - 1])) + + int_extended = list() + for i, core in enumerate(self.get_processor().get_cores()): + phandle = state.phandle(f"cpu@{i}.int_state") + int_extended.append(phandle) + int_extended.append(0xB) + int_extended.append(phandle) + int_extended.append(0x9) + + plic_node.append(FdtPropertyWords("interrupts-extended", int_extended)) + plic_node.append(FdtProperty("interrupt-controller")) + plic_node.appendCompatible(["riscv,plic0"]) + + soc_node.append(plic_node) + + # PCI + pci_state = FdtState( + addr_cells=3, size_cells=2, cpu_cells=1, interrupt_cells=1 + ) + pci_node = FdtNode("pci") + + if int(self.platform.pci_host.conf_device_bits) == 8: + pci_node.appendCompatible("pci-host-cam-generic") + elif int(self.platform.pci_host.conf_device_bits) == 12: + pci_node.appendCompatible("pci-host-ecam-generic") + else: + m5.fatal("No compatibility string for the set conf_device_width") + + pci_node.append(FdtPropertyStrings("device_type", ["pci"])) + + # Cell sizes of child nodes/peripherals + pci_node.append(pci_state.addrCellsProperty()) + pci_node.append(pci_state.sizeCellsProperty()) + pci_node.append(pci_state.interruptCellsProperty()) + # PCI address for CPU + pci_node.append( + FdtPropertyWords( + "reg", + soc_state.addrCells(self.platform.pci_host.conf_base) + + soc_state.sizeCells(self.platform.pci_host.conf_size), + ) + ) + + # Ranges mapping + # For now some of this is hard coded, because the PCI module does not + # have a proper full understanding of the memory map, but adapting the + # PCI module is beyond the scope of what I'm trying to do here. + # Values are taken from the ARM VExpress_GEM5_V1 platform. + ranges = [] + # Pio address range + ranges += self.platform.pci_host.pciFdtAddr(space=1, addr=0) + ranges += soc_state.addrCells(self.platform.pci_host.pci_pio_base) + ranges += pci_state.sizeCells(0x10000) # Fixed size + + # AXI memory address range + ranges += self.platform.pci_host.pciFdtAddr(space=2, addr=0) + ranges += soc_state.addrCells(self.platform.pci_host.pci_mem_base) + ranges += pci_state.sizeCells(0x40000000) # Fixed size + pci_node.append(FdtPropertyWords("ranges", ranges)) + + # Interrupt mapping + plic_handle = int_state.phandle(plic) + int_base = self.platform.pci_host.int_base + + interrupts = [] + + for i in range(int(self.platform.pci_host.int_count)): + interrupts += self.platform.pci_host.pciFdtAddr( + device=i, addr=0 + ) + [int(i) + 1, plic_handle, int(int_base) + i] + + pci_node.append(FdtPropertyWords("interrupt-map", interrupts)) + + int_count = int(self.platform.pci_host.int_count) + if int_count & (int_count - 1): + fatal("PCI interrupt count should be power of 2") + + intmask = self.platform.pci_host.pciFdtAddr( + device=int_count - 1, addr=0 + ) + [0x0] + pci_node.append(FdtPropertyWords("interrupt-map-mask", intmask)) + + if self.platform.pci_host._dma_coherent: + pci_node.append(FdtProperty("dma-coherent")) + + soc_node.append(pci_node) + + # UART node + uart = self.platform.uart + uart_node = uart.generateBasicPioDeviceNode( + soc_state, "uart", uart.pio_addr, uart.pio_size + ) + uart_node.append( + FdtPropertyWords("interrupts", [self.platform.uart_int_id]) + ) + uart_node.append(FdtPropertyWords("clock-frequency", [0x384000])) + uart_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + uart_node.appendCompatible(["ns8250"]) + soc_node.append(uart_node) + + # VirtIO MMIO disk node + disk = self.disk + disk_node = disk.generateBasicPioDeviceNode( + soc_state, "virtio_mmio", disk.pio_addr, disk.pio_size + ) + disk_node.append(FdtPropertyWords("interrupts", [disk.interrupt_id])) + disk_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + disk_node.appendCompatible(["virtio,mmio"]) + soc_node.append(disk_node) + + # VirtIO MMIO rng node + rng = self.rng + rng_node = rng.generateBasicPioDeviceNode( + soc_state, "virtio_mmio", rng.pio_addr, rng.pio_size + ) + rng_node.append(FdtPropertyWords("interrupts", [rng.interrupt_id])) + rng_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + rng_node.appendCompatible(["virtio,mmio"]) + soc_node.append(rng_node) + + root.append(soc_node) + + fdt = Fdt() + fdt.add_rootnode(root) + fdt.writeDtsFile(os.path.join(outdir, "device.dts")) + fdt.writeDtbFile(os.path.join(outdir, "device.dtb")) + + @overrides(KernelDiskWorkload) + def get_default_kernel_args(self) -> List[str]: + # return ["console=ttyS0", "root={root_value}", "init=/root/gem5_init.sh", "rw"] + return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"] + + @overrides(AbstractBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(AbstractBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory_setup/riscv_sst_board.py b/disaggregated_memory_setup/riscv_sst_board.py new file mode 100644 index 0000000000..4c0513b704 --- /dev/null +++ b/disaggregated_memory_setup/riscv_sst_board.py @@ -0,0 +1,514 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from typing import List, Optional, Sequence, Tuple + +from gem5.components.boards.riscv_board import RiscvBoard + +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.utils.override import overrides +from gem5.resources.resource import AbstractResource +from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload +from gem5.components.boards.abstract_system_board import AbstractSystemBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +from gem5.isas import ISA + +import m5 + +from m5.objects import ( + AddrRange, + HiFive, + Frequency, + Port, + OutgoingRequestBridge, + NoncoherentXBar, +) + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + + +class RiscvSstBoard(RiscvBoard): + """ + A board capable of full system simulation for RISC-V + At a high-level, this is based on the HiFive Unmatched board from SiFive. + This board assumes that you will be booting Linux. + + **Limitations** + * Only works with classic caches + """ + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory_size: str, + cache_hierarchy: AbstractCacheHierarchy, + instance: int, + ) -> None: + self._localMemory = local_memory + # Since the remote memory is defined in SST's side, we only need the + # size of this memory while setting up stuff from Gem5's side. + self._remoteMemory = OutgoingRequestBridge() + self._remoteMemorySize = remote_memory_size + self._instanceCount = instance + super().__init__( + clk_freq=clk_freq, + processor=processor, + memory=local_memory, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + # Do not setup the remote memory here. + self.remote_memory = self._remoteMemory + + if processor.get_isa() != ISA.RISCV: + raise Exception( + "The RISCVBoard requires a processor using the" + "RISCV ISA. Current processor ISA: " + f"'{processor.get_isa().name}'." + ) + + @overrides(AbstractSystemBoard) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + # raise Exception("cannot call this method") + return self._remoteMemory + + @overrides(AbstractSystemBoard) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + # return self.get_remote_memory().port # get_mem_ports() + + @overrides(AbstractSystemBoard) + def _setup_memory_ranges(self): + + # the memory has to be setup for both the memory ranges. there is one + # local memory range, close to the host machine and the other range is + # pure memory, far from the host. + local_memory = self.get_local_memory() + remote_memory = self.get_remote_memory() + + local_mem_size = local_memory.get_size() + # This is a string + remote_mem_size = self._remoteMemorySize + + self._local_mem_ranges = [ + AddrRange(start=0x80000000, size=local_mem_size) + ] + + # The remote memory starts where the local memory ends. Therefore it + # has to be offset by the local memory's size. + # self._remote_mem_ranges = [ + # AddrRange(start=0x80000000 + local_mem_size, size=remote_mem_size) + # ] + + # keeping a hole in the mem ranges to simulate multiple nodes without + # using a translator simobject. + remote_memory_start_addr = ( + 0x80000000 + local_mem_size + self._instanceCount * 0x80000000 + ) + self._remote_mem_ranges = [ + AddrRange(start=remote_memory_start_addr, size=remote_mem_size) + ] + + # using a _global_ memory range to keep a track of all the memory + # ranges. This is used to generate the dtb for this machine + self._global_mem_ranges = [] + self._global_mem_ranges.append(self._local_mem_ranges[0]) + self._global_mem_ranges.append(self._remote_mem_ranges[0]) + + # setting the memory ranges for both of the memory ranges. + local_memory.set_memory_range(self._local_mem_ranges) + remote_memory.physical_address_ranges = self._remote_mem_ranges + # remote_memory.set_memory_range(self._remote_mem_ranges) + + @overrides(RiscvBoard) + def generate_device_tree(self, outdir: str) -> None: + """Creates the dtb and dts files. + Creates two files in the outdir: 'device.dtb' and 'device.dts' + :param outdir: Directory to output the files + """ + + state = FdtState(addr_cells=2, size_cells=2, cpu_cells=1) + root = FdtNode("/") + root.append(state.addrCellsProperty()) + root.append(state.sizeCellsProperty()) + root.appendCompatible(["riscv-virtio"]) + + for idx, mem_range in enumerate(self._global_mem_ranges): + node = FdtNode("memory@%x" % int(mem_range.start)) + node.append(FdtPropertyStrings("device_type", ["memory"])) + node.append( + FdtPropertyWords( + "reg", + state.addrCells(mem_range.start) + + state.sizeCells(mem_range.size()), + ) + ) + # adding the NUMA node information so that the OS can identify all + # the NUMA ranges. + node.append(FdtPropertyWords("numa-node-id", [idx])) + root.append(node) + + # See Documentation/devicetree/bindings/riscv/cpus.txt for details. + cpus_node = FdtNode("cpus") + cpus_state = FdtState(addr_cells=1, size_cells=0) + cpus_node.append(cpus_state.addrCellsProperty()) + cpus_node.append(cpus_state.sizeCellsProperty()) + # Used by the CLINT driver to set the timer frequency. Value taken from + # RISC-V kernel docs (Note: freedom-u540 is actually 1MHz) + cpus_node.append(FdtPropertyWords("timebase-frequency", [100000000])) + + for i, core in enumerate(self.get_processor().get_cores()): + node = FdtNode(f"cpu@{i}") + node.append(FdtPropertyStrings("device_type", "cpu")) + node.append(FdtPropertyWords("reg", state.CPUAddrCells(i))) + # The CPUs are also associated to the NUMA nodes. All the CPUs are + # bound to the first NUMA node. + node.append(FdtPropertyWords("numa-node-id", [0])) + node.append(FdtPropertyStrings("mmu-type", "riscv,sv48")) + node.append(FdtPropertyStrings("status", "okay")) + node.append(FdtPropertyStrings("riscv,isa", "rv64imafdc")) + # TODO: Should probably get this from the core. + freq = self.clk_domain.clock[0].frequency + node.append(FdtPropertyWords("clock-frequency", freq)) + node.appendCompatible(["riscv"]) + int_phandle = state.phandle(f"cpu@{i}.int_state") + node.appendPhandle(f"cpu@{i}") + + int_node = FdtNode("interrupt-controller") + int_state = FdtState(interrupt_cells=1) + int_phandle = int_state.phandle(f"cpu@{i}.int_state") + int_node.append(int_state.interruptCellsProperty()) + int_node.append(FdtProperty("interrupt-controller")) + int_node.appendCompatible("riscv,cpu-intc") + int_node.append(FdtPropertyWords("phandle", [int_phandle])) + + node.append(int_node) + cpus_node.append(node) + + root.append(cpus_node) + + soc_node = FdtNode("soc") + soc_state = FdtState(addr_cells=2, size_cells=2) + soc_node.append(soc_state.addrCellsProperty()) + soc_node.append(soc_state.sizeCellsProperty()) + soc_node.append(FdtProperty("ranges")) + soc_node.appendCompatible(["simple-bus"]) + + # CLINT node + clint = self.platform.clint + clint_node = clint.generateBasicPioDeviceNode( + soc_state, "clint", clint.pio_addr, clint.pio_size + ) + int_extended = list() + for i, core in enumerate(self.get_processor().get_cores()): + phandle = soc_state.phandle(f"cpu@{i}.int_state") + int_extended.append(phandle) + int_extended.append(0x3) + int_extended.append(phandle) + int_extended.append(0x7) + clint_node.append( + FdtPropertyWords("interrupts-extended", int_extended) + ) + # NUMA information is also associated with the CLINT controller. + # In this board, the objective to associate one NUMA node to the CPUs + # and the other node with no CPUs. To generalize this, an additional + # CLINT controller has to be created on this board, which will make it + # completely NUMA, instead of just disaggregated NUMA-like board. + clint_node.append(FdtPropertyWords("numa-node-id", [0])) + clint_node.appendCompatible(["riscv,clint0"]) + soc_node.append(clint_node) + + # PLIC node + plic = self.platform.plic + plic_node = plic.generateBasicPioDeviceNode( + soc_state, "plic", plic.pio_addr, plic.pio_size + ) + + int_state = FdtState(addr_cells=0, interrupt_cells=1) + plic_node.append(int_state.addrCellsProperty()) + plic_node.append(int_state.interruptCellsProperty()) + + phandle = int_state.phandle(plic) + plic_node.append(FdtPropertyWords("phandle", [phandle])) + # Similar to the CLINT interrupt controller, another PLIC controller is + # required to make this board a general NUMA like board. + plic_node.append(FdtPropertyWords("numa-node-id", [0])) + plic_node.append(FdtPropertyWords("riscv,ndev", [plic.n_src - 1])) + + int_extended = list() + for i, core in enumerate(self.get_processor().get_cores()): + phandle = state.phandle(f"cpu@{i}.int_state") + int_extended.append(phandle) + int_extended.append(0xB) + int_extended.append(phandle) + int_extended.append(0x9) + + plic_node.append(FdtPropertyWords("interrupts-extended", int_extended)) + plic_node.append(FdtProperty("interrupt-controller")) + plic_node.appendCompatible(["riscv,plic0"]) + + soc_node.append(plic_node) + + # PCI + pci_state = FdtState( + addr_cells=3, size_cells=2, cpu_cells=1, interrupt_cells=1 + ) + pci_node = FdtNode("pci") + + if int(self.platform.pci_host.conf_device_bits) == 8: + pci_node.appendCompatible("pci-host-cam-generic") + elif int(self.platform.pci_host.conf_device_bits) == 12: + pci_node.appendCompatible("pci-host-ecam-generic") + else: + m5.fatal("No compatibility string for the set conf_device_width") + + pci_node.append(FdtPropertyStrings("device_type", ["pci"])) + + # Cell sizes of child nodes/peripherals + pci_node.append(pci_state.addrCellsProperty()) + pci_node.append(pci_state.sizeCellsProperty()) + pci_node.append(pci_state.interruptCellsProperty()) + # PCI address for CPU + pci_node.append( + FdtPropertyWords( + "reg", + soc_state.addrCells(self.platform.pci_host.conf_base) + + soc_state.sizeCells(self.platform.pci_host.conf_size), + ) + ) + + # Ranges mapping + # For now some of this is hard coded, because the PCI module does not + # have a proper full understanding of the memory map, but adapting the + # PCI module is beyond the scope of what I'm trying to do here. + # Values are taken from the ARM VExpress_GEM5_V1 platform. + ranges = [] + # Pio address range + ranges += self.platform.pci_host.pciFdtAddr(space=1, addr=0) + ranges += soc_state.addrCells(self.platform.pci_host.pci_pio_base) + ranges += pci_state.sizeCells(0x10000) # Fixed size + + # AXI memory address range + ranges += self.platform.pci_host.pciFdtAddr(space=2, addr=0) + ranges += soc_state.addrCells(self.platform.pci_host.pci_mem_base) + ranges += pci_state.sizeCells(0x40000000) # Fixed size + pci_node.append(FdtPropertyWords("ranges", ranges)) + + # Interrupt mapping + plic_handle = int_state.phandle(plic) + int_base = self.platform.pci_host.int_base + + interrupts = [] + + for i in range(int(self.platform.pci_host.int_count)): + interrupts += self.platform.pci_host.pciFdtAddr( + device=i, addr=0 + ) + [int(i) + 1, plic_handle, int(int_base) + i] + + pci_node.append(FdtPropertyWords("interrupt-map", interrupts)) + + int_count = int(self.platform.pci_host.int_count) + if int_count & (int_count - 1): + fatal("PCI interrupt count should be power of 2") + + intmask = self.platform.pci_host.pciFdtAddr( + device=int_count - 1, addr=0 + ) + [0x0] + pci_node.append(FdtPropertyWords("interrupt-map-mask", intmask)) + + if self.platform.pci_host._dma_coherent: + pci_node.append(FdtProperty("dma-coherent")) + + soc_node.append(pci_node) + + # UART node + uart = self.platform.uart + uart_node = uart.generateBasicPioDeviceNode( + soc_state, "uart", uart.pio_addr, uart.pio_size + ) + uart_node.append( + FdtPropertyWords("interrupts", [self.platform.uart_int_id]) + ) + uart_node.append(FdtPropertyWords("clock-frequency", [0x384000])) + uart_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + uart_node.appendCompatible(["ns8250"]) + soc_node.append(uart_node) + + # VirtIO MMIO disk node + disk = self.disk + disk_node = disk.generateBasicPioDeviceNode( + soc_state, "virtio_mmio", disk.pio_addr, disk.pio_size + ) + disk_node.append(FdtPropertyWords("interrupts", [disk.interrupt_id])) + disk_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + disk_node.appendCompatible(["virtio,mmio"]) + soc_node.append(disk_node) + + # VirtIO MMIO rng node + rng = self.rng + rng_node = rng.generateBasicPioDeviceNode( + soc_state, "virtio_mmio", rng.pio_addr, rng.pio_size + ) + rng_node.append(FdtPropertyWords("interrupts", [rng.interrupt_id])) + rng_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + rng_node.appendCompatible(["virtio,mmio"]) + soc_node.append(rng_node) + + root.append(soc_node) + + fdt = Fdt() + fdt.add_rootnode(root) + fdt.writeDtsFile(os.path.join(outdir, "device.dts")) + fdt.writeDtbFile(os.path.join(outdir, "device.dtb")) + + @overrides(KernelDiskWorkload) + def get_default_kernel_args(self) -> List[str]: + return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"] + + @overrides(AbstractBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + # Add a NoncoherentXBar here + + self.remote_link = NoncoherentXBar( + frontend_latency=0, + forward_latency=0, + response_latency=750, + width=256, + ) + self.get_remote_memory().port = self.remote_link.mem_side_ports + self.get_cache_hierarchy().membus.mem_side_ports = ( + self.remote_link.cpu_side_ports + ) + + # self.get_remote_memory().port = \ + # self.get_cache_hierarchy().membus.mem_side_ports + # self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(RiscvBoard) + def get_default_kernel_args(self) -> List[str]: + return [ + "console=ttyS0", + "root={root_value}", + "rw", + "init=/root/gem5-init.sh" + # "init=\"mount -t sysfs - /sys; mount -t proc - /proc; m5 exit;\"" + # "init=/bin/bash" + ] + + @overrides(AbstractBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + print( + "__ranges__", self.get_remote_memory().physical_address_ranges[0] + ) + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + # self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory_setup/simulator_project.ipynb b/disaggregated_memory_setup/simulator_project.ipynb new file mode 100644 index 0000000000..e7756e81f5 --- /dev/null +++ b/disaggregated_memory_setup/simulator_project.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "# This script generates plots for the disaggregated memory project.\n", + "# Run this script from amarillo.cs.ucdavis.edu\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plot_idx = 0\n", + "# overwrite this for each plot\n", + "data = np.zeros((3,4))\n", + "home_path = \"/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/\" \\\n", + " + \"SST13/gem5/ext/sst/final_data\"\n", + "kernels = [\"Copy\", \"Scale\", \"Add\", \"Triad\"]\n", + "bar_width = float(1/4)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Timing results" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results 9\n", + "[0, 1, 2, 3] [[6.308304 5.890346 7.01689 6.978944]\n", + " [6.008993 5.932203 7.050845 6.956073]\n", + " [1.881847 1.874148 2.094582 2.086612]]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAEICAYAAAB25L6yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAYGElEQVR4nO3df3RV5Z3v8ffHAMaiFYVchxIxuVoVf6KNFiu1nYo/iz+Hi+CPltY2HatdVudeq6OzBq/t7b0zVtuqvV1UrdpWQCuIo60/WnWqtxUMioIGf0xlagBrCCiCIgrf+8fZiUk4IeeQc5In4fNai0XO3s9++O69wufs85znPEcRgZmZpWuHvi7AzMy2zkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1JYMSRdJapD0vqTbCmhfLelXklokrZe0QNLETm0ek9Qsaa2k5ySd1mn/SEm3SFop6R1JSyVdLWlotj+yvtdJWi7pOkkV7fbt06m/6ZJ+2eOLYdaOg9pSsgL4LnBrdw0l7Q48CWwEDgRGANcDd0qa1K7pxcDIiPg4UA/8UtLIdn38CdgJOCoidgGOA4YBe7fr49CI2Bk4Fjgb+HoPztGsaIP6ugCzVhExB0BSHVDdTfNLgHXA+RGxOds2U9Jo4AeS7omc59v/E8BgYE9gJXAp8A5wbmsfEfE6uXDPV99SSU8AB23TCZptI99RW391HHBPu5BudRcwGti3dYOk+yVtAOYDjwMN2a4JwJw8feQl6QDgs8CzPSvdrDi+o7b+agS5u+LOVrbb/xJAREyUNJhcMI9pF8zDu+ijs2ckbQJWAzcDP+9J4WbFclBbf7UKGJln+8h2+9tExAfAbyVdLOnViLgPaOmij84Oj4hX82zfRG4opb3BwAcF9GlWMA99WH/1O+BMSZ1/hycDrwMvd3HcID56o/B3wBl5+ijUX4CaTttqgf/cxv7M8nJQWzIkDZJUCVQAFZIqJXX1qu96YFfgFkl/k7WdClwJ/I+ICEn7SzpJ0k6SBks6FzgG+Pesj+uAjwO3S9orq2FUNgXvkAJKng1clU0T3EHSBOAU4Nfbeg3M8nFQW0quAt4DLgfOzX6+Kl/DiGgBxgOVwIvkhjEuBc6LiNlZMwHTgTeBZnKzOc6KiGeyPlYDnyE3VDFf0jvA74G3gXxDHZ39T+CP5KYJrgH+BTgnIpYUc9Jm3ZG/OMDMLG2+ozYzS5yD2swscQ5qM7PEOajNzBJXlg+8jBgxImpqasrRtZnZgLRw4cJVEVGVb19ZgrqmpoaGhobuG5qZGQCSuvyglIc+zMwS56A2M0ucg9rMLHFePc/MtuqDDz6gqamJDRs29HUpA0JlZSXV1dUMHtx54cWuOajNbKuamprYZZddqKmpQVJfl9OvRQQtLS00NTVRW1tb8HEe+jCzrdqwYQPDhw93SJeAJIYPH170qxMHtZl1yyFdOttyLbsNakn7SVrU7s9aSd/elgLNzKx43Y5RR8RLwFgASRXAcmBuecsys1TVXP5ASftb9r+/2G0bSVx66aX84Ac/AODaa69l3bp1TJ8+nWnTpjFx4kQmTZrU1n7nnXdm3bp1LFu2jNraWq688kq++93vArBq1SpGjhzJN77xDW688ca2Y8aOHcv+++/PrFmzuqzj+9//PrfccgsVFRX8+Mc/5oQTTtjW0y5KsW8mHgv8R0T4q4ZsYJu+a4n7e7u0/W1ndtxxR+bMmcMVV1zBiBEjijq2traWBx54oC2o7777bg488MAObRobG9m0aRNPPPEE69evZ+jQoVv08+KLLzJr1ixeeOEFVqxYwYQJE3j55ZepqKjY9hMrULFBPQWYmW+HpHqgHmD06NE9LMtsYCn1XSgUdic6UAwaNIj6+nquv/56vve97xV17Mc+9jHGjBlDQ0MDdXV1zJ49m8mTJ7NixYq2NjNnzuS8886jsbGRefPmcfbZZ2/Rz7x585gyZQo77rgjtbW17LPPPixYsICjjjqqx+fXnYKDWtIQ4FTginz7I2IGMAOgrq7OXxtjZoVZ8Wz3bWIzF545njETzuHkc+pZ+dZ7vPvuBp5veos16zfyny3reb7prbbmmwOeb3qL5SvXsuGDTYybMJEbfnY752yu5L0Pg02Vw1i17rW29rNnz+aRRx5h6dKl3HDDDXmDevny5YwbN67tcXV1NcuXL+/RqReqmFkfJwHPRMRfy1WMmVlXPr7Lzkz8uynceeuMjjvyzKLoPLPi6M9P4KknHuPB++ZwwilndNjX0NDAiBEjGD16NMceeyzPPvssq1evLnn9PVFMUE+li2EPM7PecO75F3Dv7F/w3nvvtm0bttvurH37rbbHb69Zw7Ddd+9w3OAhQxhz8FjumHETx518Wod9M2fOZOnSpdTU1LD33nuzdu1a7rnnHubOncvYsWMZO3YsDQ0NjBo1itdff73tuKamJkaNGlWeE+2koKCWNBQ4DphT3nLMzLq26267cfzE05k76xdt2+qOOpqH/m0uH2zcCMC8u+/kiKM+u8WxX6q/kG9fMZ1dd9utbdvmzZu56667WLx4McuWLWPZsmXMmzePmTNncsYZZ7Bo0SIWLVpEXV0dp556KrNmzeL999/ntdde45VXXuHII48s/0lT4Bh1RKwHhpe5FjPrBwp+E7OQsedt8KX6i5h1281tjz834UQaFz/HlJM/T0VFBdV71XLV96/b4rh99hvDPvuN6bDtiSeeYNSoUXziE59o23bMMcfw4osvsnLlSkaOHNm2/cADD2Ty5MkccMABDBo0iJtuuqlXZnwAKKL07/vV1dWFvzjA+rUST8+r2XBnSfuD3pv10djYyJgxY7pv2FkZgvr5zYWvj1GIQ6qHlbS/QuW7ppIWRkRdvvb+CLmZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmifNXcZlZcUq9smD949020ajDubT+XL78TzcAcPtPb+Ddd9dzwaWX80+XfJNjJpzAcV/86BOH4/ar5qmXmlj++l84+TOH8vVv/QMXXXYVAGtWtzDhU/vzd+dMY9ZtP2s7plTLnF555ZXccccdrFmzhnXr1hV6FbbKd9RmlrwddxzCnN8+yprVLUUfO2r0Xvzh0YfbHj9y/73sve/+Hdp0XuY0n/bLnD744IN885vfZNOmTVu0O+WUU1iwYEHRdW6Ng9rMkjeoooL6c87klz/7SdHHVlbuxH/dZ19eeC73AZyH/m0ux088vUOb1mVOjz/+eObNm5e3n66WOe1s3LhxHT7RWAoOajPrFy6cNpnf3Hs376wt/ksYTjz1TB68bw5vrGhih4oKqvboGKSzZ89mypQpTJ06lZkz8689t3z5cvbcc8+2x6kuc2pm1me8zKmZWT/gZU7NzBKX6jKnxx57bFmHQTw9z8yKU+gX9W4ny5xu3ryZV199ld2zu/jLLruMO++8k3fffZfq6mq+9rWvMX369B6d84Bf5rQvvtreBgAvc9rGy5xu3ZIlS7j11lu57rotnxy64mVOzcx60UEHHVRUSG8LB7WZWeIc1GZmiXNQm5klrtBvIR8m6deSlkpqlHRUuQszM7OcQqfn/Qh4MCImSRoCfKyMNZmZWTvdBrWkXYFjgGkAEbER2FjesswsVQfffnBJ+1t83B3dtklhmdOWlhYmTZrE008/zbRp07jxxht7ctpFKeSOuhZoBn4u6VBgIXBxRORfC7CnSr3WLaWfv2pmvat1mdNTL2xht92HF3Vs6zKnrUFdyDKnQ4cO3aKfyspKrrnmGpYsWcKSJUu2/WS2QSFj1IOAw4H/GxGHAeuByzs3klQvqUFSQ3Nzc4nLNLPtWQrLnA4dOpTx48dTWVlZdA09VUhQNwFNETE/e/xrcsHdQUTMiIi6iKirqqoqZY1mZn2+zGlf6nboIyLekPS6pP0i4iXgWODF8pdm26zUw0eFru1gVkbtlzntcFdb4DKnN137vxheVbXVZU5HjRrFV7/6VVavXt22dkcKCp318S3gV9mMjz8DXylfSZaaUq+XAl4zxbbNuedfwJSTP8dpk89p21bsMqdzf/8Ujz/y27Z97Zc5BdqWOR0xYgRXX301ADfffDN1dXmX4egVBQV1RCwC+q5KMzM6LnN6+lnnArllTn91y085ddJUBg8ZstVlTuvGHd3lMqetK+g99thjXHPNNTz66KOcccYZW/TTF7zMqZkVZfGXFxfWcIAtcwpQU1PD2rVr2bhxI/feey8PP/wwBxxwQInPcEsOajNL3rpX/l/bz8Or/gvzX1nRYf/fX/Id/v6S72xx3Kg9RzPn93/aYvtpk8/mtMlnc0j1MJ566qkO+yoqKnjjjTfy1rFs2bJtqL7nvNaHmVniHNRmZolzUJtZt8rxTVDbq225lg5qM9uqyspKWlpaHNYlEBG0tLQU/elGv5loZltVXV1NU1MTRS8N8dabJa/lryV+smh8Z6eS9leIyspKqqurizrGQW1mWzV48GBqa7fhS2Wnjyt5LSeV+EuC+8sHrzz0YWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWuIJWz5O0DHgH2AR8GBH+RnIzs15SzDKnfxsRq8pWiZmZ5eWhDzOzxBUa1AE8LGmhpPp8DSTVS2qQ1FD0N0GYmVmXCg3q8RFxOHAScKGkYzo3iIgZEVEXEXVVVVUlLdLMbHtWUFBHxPLs7zeBucCR5SzKzMw+0m1QSxoqaZfWn4HjgSXlLszMzHIKmfWxBzBXUmv7OyPiwbJWZWZmbboN6oj4M3BoL9RiZmZ5eHqemVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWuIKDWlKFpGcl3V/OgszMrKNi7qgvBhrLVYiZmeVXUFBLqga+CNxc3nLMzKyzQu+ofwhcBmzuqoGkekkNkhqam5tLUZuZmVFAUEuaCLwZEQu31i4iZkREXUTUVVVVlaxAM7PtXSF31EcDp0paBswCviDpl2WtyszM2nQb1BFxRURUR0QNMAV4NCLOLXtlZmYGeB61mVnyBhXTOCIeBx4vSyVmZpaX76jNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLXbVBLqpS0QNJzkl6QdHVvFGZmZjmDCmjzPvCFiFgnaTDwpKTfRsRTZa7NzMwoIKgjIoB12cPB2Z8oZ1FmZvaRgsaoJVVIWgS8CTwSEfPztKmX1CCpobm5ucRlmpltvwoK6ojYFBFjgWrgSEkH5WkzIyLqIqKuqqqqxGWamW2/ipr1ERFvAY8BJ5alGjMz20Ihsz6qJA3Lft4JOA5YWua6zMwsU8isj5HA7ZIqyAX7XRFxf3nLMjOzVoXM+ngeOKwXajEzszz8yUQzs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxx3X65raQ9gTuAPYAAZkTEj8pdmJlt3cG3H1zS/hZ/eXFJ+7PS6TaogQ+Bf4iIZyTtAiyU9EhEvFjm2szMyqq/PNl1O/QRESsj4pns53eARmBUWaoxM7MtFDVGLakGOAyYn2dfvaQGSQ3Nzc0lKs/MzAoOakk7A/cA346ItZ33R8SMiKiLiLqqqqpS1mhmtl0rKKglDSYX0r+KiDnlLcnMzNrrNqglCbgFaIyI68pfkpmZtVfIrI+jgfOAxZIWZdv+MSJ+U7aqElbqd4lh+5wW1V/ebTdLQbdBHRFPAuqFWszMLA9/MtHMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0tct0Et6VZJb0pa0hsFmZlZR4XcUd8GnFjmOszMrAvdBnVE/AFY3Qu1mJlZHiUbo5ZUL6lBUkNzc3OpujUz2+6VLKgjYkZE1EVEXVVVVam6NTPb7nnWh5lZ4hzUZmaJK2R63kzgT8B+kpoknV/+sszMrNWg7hpExNTeKMTMzPLz0IeZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIKCmpJJ0p6SdKrki4vd1FmZvaRboNaUgVwE3AScAAwVdIB5S7MzMxyCrmjPhJ4NSL+HBEbgVnAaeUty8zMWikitt5AmgScGBFfyx6fB3w6Ii7q1K4eqM8e7ge8VPpyy2oEsKqvi+jnfA1Lw9ex5/rjNdwrIqry7RhUqn8hImYAM0rVX2+T1BARdX1dR3/ma1gavo49N9CuYSFDH8uBPds9rs62mZlZLygkqJ8GPimpVtIQYApwX3nLMjOzVt0OfUTEh5IuAh4CKoBbI+KFslfW+/rtsE1CfA1Lw9ex5wbUNez2zUQzM+tb/mSimVniHNRmZokb0EEt6W8kzZL0H5IWSvqNpH37uq7+RNKVkl6Q9LykRZI+XeTxNZKWlKu+/kjS6ZJC0v5d7H9c0hZTyyRNk3Rj+StMj6Th2e/fIklvSFre7vGQrM2pxS5xIWmZpBHlqbp0SjaPOjWSBMwFbo+IKdm2Q4E9gJf7srb+QtJRwETg8Ih4P/uFHtLHZQ0EU4Ens7//uY9r6RciogUYCyBpOrAuIq5t3S9pUETcxwCdkTaQ76j/FvggIn7auiEingOelPSvkpZIWizpLABJn5f0B0kPZAtQ/VTSDpK+KumHrX1I+rqk63v9bPrGSGBVRLwPEBGrImKFpCMk/VHSc5IWSNolu3N+QtIz2Z/PdO5MUkV27Z/O7tC/0etn1Mck7QyMB84nN9UVSTtlr/waJc0FdmrX/iuSXpa0ADi6T4pOlKTbsv+n84F/af+KQ9IpkuZLelbS7yTtkW0fLunh7FXizYD68hwKNZCD+iBgYZ7tZ5J7Zj4UmAD8q6SR2b4jgW+RW3xq76ztXcApkgZnbb4C3Fq+spPyMLBnFhQ/kfS57GXmbODiiGi9hu8BbwLHRcThwFnAj/P0dz7wdkQcARwBfF1Sba+cSTpOAx6MiJeBFkmfAi4A3o2IMeTusD8FkP1eXk0uoMeT+720jqqBz0TEpZ22PwmMi4jDyK1PdFm2/Z+BJyPiQHKvuEf3WqU9MGCHPrZiPDAzIjYBf5X07+RCYy2wICL+DCBpJjA+In4t6VFgoqRGYHBELO6r4ntTRKzLguSz5F6hzAa+B6yMiKezNmsBJA0FbpQ0FtgE5Hsv4HjgkGz9GIBdgU8Cr5XzPBIzFfhR9vOs7PE+ZE9sEfG8pOez/Z8GHo+IZgBJs8l/Xbdnd2f/lzurBmZnT3ZD+Oh37BhyN2BExAOS1vROmT0zkIP6BWBSt6066jypvPXxzcA/AkuBn/ewrn4l+0/wOPC4pMXAhV00vQT4K7lXKjsAG/K0EfCtiHioDKUmT9LuwBeAgyUFuQ+QBfBsnxbWv63vYvsNwHURcZ+kzwPTe6ugchjIQx+PAjtmq/oBIOkQ4C3grGy8tIrcM+yCrMmR2UfldyD38v1JgIiYT269k7OBmb13Cn1L0n6SPtlu01igERgp6YiszS6SBpG7O14ZEZuB88iFUGcPARe0DiNJ2je7E99eTAJ+ERF7RURNROxJ7k5vIbnfLSQdBByStZ8PfC4bVx0M/Le+KLqf2pWP1iT6crvtf+Cja30SsFsv17VNBuwddUSEpDOAH0r6Drk7vGXAt4GdgefI3c1cFhFvZFOlngZuJPdS9DFyY1it7gLGRkS/eKlUIjsDN0gaBnwIvEpuKdufZ9t3Ijc+PQH4CXCPpC8BD5L/TudmoAZ4JpuV0wycXt5TSMpU4P902nYPcBiwUza01kj23kpErMxmOPyJ3A3Got4qdACYDtydDW08CrS+F3I1MFPSC8Afgb/0TXnF8UfIM9nLo/8eERO72H8/cH1E/L436zIzG8hDHyUhaZikl4H3HNJm1hd8R21mljjfUZuZJc5BbWaWOAe1mVniHNRmZolzUJuZJe7/A8g7kct7rA82AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# O3 Results\n", + "\n", + "print(\"Results \", plot_idx)\n", + "plot_idx += 1\n", + "\n", + "plt.title(\"1 O3CPU\")\n", + "f = open(home_path + \"/m5out_node_O3_0/system.platform.terminal\", \"r\")\n", + "exp = 0\n", + "kernel = 0\n", + "for lines in f.read().split(\"\\n\"):\n", + " if \"Bandwidth\" in lines:\n", + " data[exp][kernel] = float(lines.split(\" \")[1])\n", + " kernel += 1\n", + " if kernel % 4 == 0:\n", + " exp += 1\n", + " kernel = 0\n", + "\n", + "print([i for i in range(4)], data)\n", + "\n", + "x_axis = np.arange(len(kernels))\n", + "plt.xticks(x_axis, kernels)\n", + "\n", + "plt.bar(x_axis, data[0], bar_width, label=\"NUMA-0\")\n", + "plt.bar(x_axis - bar_width, data[1], bar_width, label=\"NUMA-0,1\")\n", + "plt.bar(x_axis + bar_width, data[2], bar_width, label=\"NUMA-1\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results 13\n", + "[0, 1, 2, 3] [[10.324512 10.065496 11.409131 11.371691]\n", + " [10.31722 10.129058 11.51054 11.468607]\n", + " [ 7.576464 7.489263 8.33215 8.295167]]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEICAYAAABGaK+TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXt0lEQVR4nO3de5RU5Z3u8e9jAzZeRlR6HKQ13eMVvAST1uBljCfgJQY1RoaAxkhuJDF6Ep1ZXg4zR3KSrGROjGYUc7KIGjWJDaIiJs4ojsZRE0UbQZCLl4mMNhdtWhAREYXf+aM22DQNXV21+/LC81mrF1273v3Wr/Zqnnrrrb3fUkRgZmbp2aW7CzAzs9I4wM3MEuUANzNLlAPczCxRDnAzs0Q5wM3MEuUANzNLlAPckiHpEEnrJP22nXaDJd0v6W1J70j6o6QTWtzfX9KfJDVLWiXpKUknturjUElTJa3I+pkr6XJJFZJqJIWkNdnPYklXZfttuq9Xq/5uk/TDPI+HmQPcUnIT8Oz2Gkg6CPgTMA+oBfYHpgEzJB2fNVsDfBWoAvYG/gX4/abQzfqYCbwOHBURewF/D9QBe7Z4uH4RsQcwBvjfks7I40maFcsBbkmQNBpYBTzSTtMJwFMRMT4i3oqIdyLiBuA3FIKaiFgXES9GxEZAwAYKQb5P1sf3gT9HxOURsSzb58WIOD8iVrV+wIh4CpgPHFnm0zTrEAe49XiS/gr4P8DlRTQ/FZjaxva7gBMl9W3R71xgHXA/cHNEvJndNRy4u8jalE2/HAHMLmYfs7z0ar+JWbf7AXBLRDRKaq9tf2BZG9uXURiw7AMsAYiIoyVVAucCfVq03XcbfbS2AghgOXBVRDwiqaaI/cxy4QC3Hk3SEAoj4mOK3GUFMKCN7QOAjcDKlhsjYh1QL2mhpDkR8TzQvI0+WusfER+22rbpdu8Wv2+6/UERfZoVzVMo1tOdAtQAr0laDvwjcJ6k57bR/j8ofODY2igKc+Nrt7Ffb+BvW/RxXon1LqMQ1DWtttcC/11in2ZtcoBbTzcJOAgYkv38EngAOH0b7b8PnCDpR5L2kbSnpEuBLwNXAkgaKukkSX0k9ZV0JbAfhTNPAK7J+vippL/J9jlY0m8l9dtesRGxAbgH+JGkfSX1ljQGGAz8e4nHwKxNDnDr0SJibUQs3/RD4RTAdRHRtI32LwMnAR8HFlMYEZ8HnB4Rf8qa7UrhlMRmCvPhZwKfi4ilWR//BRxPYRQ9X9LbFEK5AXiniLIvBt4C5gJvApdk/b/RsWdvtn3yFzqYmaXJI3Azs0Q5wM3MEuUANzNLVLsBLulWSW9KeqHFtp9KWpQt8DOtvU/mzcwsf+1+iCnpZAqf/N8REUdm204DHo2IDyVtWl/iyvYerH///lFTU1N20WZmO5NZs2atiIiq1tvbvRIzIh5vfXlwRMxocfNpYGQxRdTU1NDQ0FBMUzMzy0hq8yKwPObAv8p2LlCQNE5Sg6SGpqY2T901M7MSlBXgksZTWO/hd9tqExGTIqIuIuqqqrZ6B2BmZiUqeTErSWOBEcCw8NVAZmZdrqQAz7555Arg09tZHMjMdmAffPABjY2NrFu3rrtL2WFUVlZSXV1N7969i2rfboBLqqewIlx/SY0UFvq5msJ6Eg9n6zM/HRHfKrVoM0tPY2Mje+65JzU1NRSxTru1IyJobm6msbGR2traovYp5iyUMW1svqWjxZnZjmXdunUO7xxJYt9996UjJ3v4SkwzK5nDO18dPZ4OcDOzRPkr1cwsFzVXPZBrf4t/8rl220ji8ssv52c/+xkA1157LWvWrGHChAmMHTuWESNGMHLkR9cZ7rHHHqxZs4bFixdTW1vL+PHj+eEPfwjAihUrGDBgAN/85jeZOHHi5n2GDBnC4YcfzuTJk7dZx49//GNuueUWKioquOGGGzj99G1930i+HOBmHTFhr07o8+38+9wRLJ3dbpNdd+3DvVMnc/XVV9O/f/8OdV9bW8sDDzywOcCnTp3KEUccsUWbhQsXsmHDBp544gneffdddt999636WbBgAZMnT2b+/PksXbqU4cOH89JLL1FRUdGhekrhKRQzS1avigrGXfAFrr/++g7vu9tuuzFo0KDNy3tMmTKFUaNGbdGmvr6eCy+8kNNOO43p06e32c/06dMZPXo0u+66K7W1tRx88ME888wzHX8yJfAI3KybdcfUw47kO2NHMWj4BZx5wTiWrXqPtWvXMbdxFSvfXc9/N7/L3MZVm9tuDJjbuIoly1az7oMNDB0+ght/dTsXbKzkvQ+DDZX9WLHm1c3tp0yZwsMPP8yiRYu48cYbOf/887d6/CVLljB06NDNt6urq1myZEmnPudNPAI3s6T91Z57MOK80dx566Qt72jjjI7WZ3mceMpwnn7ijzx4/72cfta5W9zX0NBA//79OfDAAxk2bBizZ8/mrbfeyr3+cjjAzSx5X/rat7lvym94772PLgzvt/c+rH571ebbb69cSb999tliv959+jDoqCHcMekmTj3znC3uq6+vZ9GiRdTU1HDQQQexevVq7rnnHqZNm8aQIUMYMmQIDQ0NDBw4kNdff33zfo2NjQwcOLBznmgrDnAzS95ee+/NaSM+z7TJv9m8re74E3no99P4YP16AKZPvZNjj/+7rfb98rjv8L2rJ7DX3ntv3rZx40buuusu5s2bx+LFi1m8eDHTp0+nvr6ec889lzlz5jBnzhzq6uo4++yzmTx5Mu+//z6vvvoqL7/8Mscdd1znP2k8B25mOenQ3HsRZ5h01JfHXcLk227efPvTw89g4bznGX3mKVRUVFD9sVr+6cfXbbXfwYcN4uDDBm2x7YknnmDgwIHsv//+m7edfPLJLFiwgGXLljFgwIDN24844ghGjRrF4MGD6dWrFzfddFOXnIECRXwjT57q6urCX+hgSeuE0whr1t2Za39d9SHmwoULGTRoUPsN25JzgM/dWNzaIcU6urpfrv11RFvHVdKsiKhr3dZTKGZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klyueBm1k+8j7Fctxj7TbRwE9w+bgvcdE/3wjA7b+8kbVr3+Xbl1/FP192MScPP51TP/fRFZZDD6vm6RcbWfL6a5x5wsf5xqX/wCVX/BMAK99qZvgnD+e8C8Yy+bZfbd4nr+Vkx48fzx133MHKlStZs2ZNsUdhuzwCN7Nk7bprH+7990dZ+VZzh/cdeODHePzRGZtvP/yH+zjo0MO3aNN6Odm2tFxO9sEHH+Tiiy9mw4YNW7U766yzcl+l0AFuZsnatJzsb3/1iw7vW1nZl789+FDmP1+4qOih30/jtBGf36JNnsvJDh06dIsrOPPgADezpH1n7Cj+7b6pvLO641+MccbZX+DB++9l+dJGdqmooGq/LQN2ypQpjB49mjFjxlBfX99mH0uWLOGAAw7YfNvLyZqZFcnLyZqZJczLyZqZJaqnLic7bNiwTp1OSec0wpxPUcp7BTjY+b7KymwLHfly5p1gOdmNGzfyyiuvsE826r/iiiu48847Wbt2LdXV1Xz9619nwoQJZT3ndJaTdYBbT+DlZDfzcrLb98ILL3Drrbdy3XVbv2hsj5eTNTPrZkceeWSHw7uj2g1wSbdKelPSCy227SPpYUkvZ//uvb0+zMwsf8XMgd8GTATuaLHtKuCRiPiJpKuy21fmX57lqhPe/ndo3tPMctVugEfE45JqWm0+Bzgl+/124DEc4DulmqseyLU/f45gVrxS58D3i4hl2e/Lgf221VDSOEkNkhqamppKfDgzM2ut7A8xo3AayzZPZYmISRFRFxF1VVVV5T6cmZllSj0P/A1JAyJimaQBwJt5FmVm6Tnq9qNy7W/eqXe026YnLCfb3NzMyJEjefbZZxk7diwTJ04s52l3SKkj8PuBi7LfLwLaXqbLzKwT9YTlZCsrK/nBD37Atdde2+EaylXMaYT1wFPAYZIaJX0N+AlwqqSXgeHZbTOzLtUTlpPdfffdOemkk6isrOxwDeVqN8AjYkxEDIiI3hFRHRG3RERzRAyLiEMiYnhE9Kwlusxsp9Hdy8l2J1+JaWZJ83KyZmYJ687lZLuTA9zMktedy8l2p3SWkzWzHm3eRfOKb7wDLScLUFNTw+rVq1m/fj333XcfM2bMYPDgwTk/w605wM0sWWte/tPm3/et+mtmvrx0i/u/ddmVfOuyrVf5GHjAgdz7yFNbbT9n1PmcM+p8jq7ux9NPP73FfRUVFSxfvrzNOhYvXlxC9eXzFIqZWaIc4GZmiXKAm1nJuvIbvXYGHT2eDnAzK0llZSXNzc0O8ZxEBM3NzR26otMfYppZSaqrq2lsbKSkZaJX5bv+3Rs5v4gsfKdvrv0Vq7Kykurq6qLbO8DNrCS9e/emtrbELxOeMDTXWj6b6BdDl8tTKGZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmifJqhGY7mKNuPyrX/jr0ZcXWpRzgZmat5P0iCJ3zQugpFDOzRJUV4JIukzRf0guS6iUV/11AZmZWlpIDXNJA4H8CdRFxJFABjM6rMDMz275yp1B6AX0l9QJ2A5aWX5KZmRWj5ACPiCXAtcBrwDLg7YiY0bqdpHGSGiQ1lPTlp2Zm1qZyplD2Bs4BaoH9gd0lfal1u4iYFBF1EVFXVVVVeqVmZraFcqZQhgOvRkRTRHwA3AuckE9ZZmbWnnIC/DVgqKTdJAkYBizMpywzM2tPOXPgM4G7geeAeVlfk3Kqy8zM2lHWlZgRcQ1wTU61mJlZB/hKTDOzRHktlBx5ESEz60oegZuZJcojcOtR/C7GrHgegZuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiygpwSf0k3S1pkaSFko7PqzAzM9u+XmXu/6/AgxExUlIfYLccajIzsyKUHOCS9gJOBsYCRMR6YH0+ZZmZWXvKmUKpBZqAX0uaLelmSbu3biRpnKQGSQ1NTU1lPJyZmbVUToD3Aj4B/L+IOAZ4F7iqdaOImBQRdRFRV1VVVcbDmZlZS+UEeCPQGBEzs9t3Uwh0MzPrAiUHeEQsB16XdFi2aRiwIJeqzMysXeWehXIp8LvsDJS/AF8pvyQzMytGWQEeEXOAunxKMTOzjvCVmGZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpaosgNcUoWk2ZL+kEdBZmZWnDxG4N8FFubQj5mZdUBZAS6pGvgccHM+5ZiZWbHKHYH/HLgC2LitBpLGSWqQ1NDU1FTmw5mZ2SYlB7ikEcCbETFre+0iYlJE1EVEXVVVVakPZ2ZmrZQzAj8ROFvSYmAy8BlJv82lKjMza1fJAR4RV0dEdUTUAKOBRyPiS7lVZmZm2+XzwM3MEtUrj04i4jHgsTz6MjOz4ngEbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZokqOcAlHSDpj5IWSJov6bt5FmZmZtvXq4x9PwT+ISKek7QnMEvSwxGxIKfazMxsO0oegUfEsoh4Lvv9HWAhMDCvwszMbPtymQOXVAMcA8xs475xkhokNTQ1NeXxcGZmRg4BLmkP4B7gexGxuvX9ETEpIuoioq6qqqrchzMzs0xZAS6pN4Xw/l1E3JtPSWZmVoxyzkIRcAuwMCKuy68kMzMrRjkj8BOBC4HPSJqT/ZyZU11mZtaOkk8jjIgnAeVYi5mZdYCvxDQzS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NElRXgks6Q9KKkVyRdlVdRZmbWvpIDXFIFcBPwWWAwMEbS4LwKMzOz7StnBH4c8EpE/CUi1gOTgXPyKcvMzNqjiChtR2kkcEZEfD27fSHwqYi4pFW7ccC47OZhwIull9tt+gMruruIxPkYls/HsHypHsOPRURV6429OvtRI2ISMKmzH6czSWqIiLruriNlPobl8zEs3452DMuZQlkCHNDidnW2zczMukA5Af4scIikWkl9gNHA/fmUZWZm7Sl5CiUiPpR0CfAQUAHcGhHzc6usZ0l6CqiH8DEsn49h+XaoY1jyh5hmZta9fCWmmVmiHOBmZonaaQNc0t9ImizpvyTNkvRvkg7t7rpSIWm8pPmS5kqaI+lTHdy/RtILnVVfiiR9XlJIOnwb9z8maatT4CSNlTSx8yvseSTtm/39zZG0XNKSFrf7ZG3O7uhSH5IWS+rfOVXnp9PPA++JJAmYBtweEaOzbR8H9gNe6s7aUiDpeGAE8ImIeD/7Q+/TzWXtCMYAT2b/XtPNtSQhIpqBIQCSJgBrIuLaTfdL6hUR97ODniG3s47A/wfwQUT8ctOGiHgeeFLSTyW9IGmepC8CSDpF0uOSHsgW7/qlpF0kfVXSzzf1Iekbkq7v8mfT9QYAKyLifYCIWBERSyUdK+nPkp6X9IykPbOR9hOSnst+TmjdmaSK7Lg/m43ov9nlz6ibSdoDOAn4GoVTcpHUN3uXuFDSNKBvi/ZfkfSSpGeAE7ul6B5K0m3Z/9GZwP9t+Q5F0lmSZkqaLek/JO2Xbd9X0ozsXeXNgLrzORRrZw3wI4FZbWz/AoVX848Dw4GfShqQ3XcccCmFhbsOytreBZwlqXfW5ivArZ1Xdo8xAzggC5BfSPp09nZ1CvDdiNh0/N4D3gROjYhPAF8Ebmijv68Bb0fEscCxwDck1XbJM+k5zgEejIiXgGZJnwS+DayNiEEURuSfBMj+Jr9PIbhPovA3aVuqBk6IiMtbbX8SGBoRx1BYv+mKbPs1wJMRcQSFd+cHdlmlZdgpp1C24ySgPiI2AG9I+k8KgbIaeCYi/gIgqR44KSLulvQoMELSQqB3RMzrruK7SkSsyQLm7yi8m5kC/AhYFhHPZm1WA0jaHZgoaQiwAWjrc4bTgKOz9XUA9gIOAV7tzOfRw4wB/jX7fXJ2+2CyF7yImCtpbnb/p4DHIqIJQNIU2j6uO7Op2f/j1qqBKdmLYB8++hs7mcKgjIh4QNLKrimzPDtrgM8HRrbbakutT5jfdPtm4H8Bi4Bfl1lXMrL/HI8Bj0maB3xnG00vA96g8K5mF2BdG20EXBoRD3VCqT2epH2AzwBHSQoKF8YFMLtbC0vbu9vYfiNwXUTcL+kUYEJXFdQZdtYplEeBXbOVEgGQdDSwCvhiNidbReFV+ZmsyXHZsgG7UJgKeBIgImZSWBPmfKC+655C95F0mKRDWmwaAiwEBkg6Nmuzp6ReFEbTyyJiI3AhhXBq7SHg25umoiQdmo3cdxYjgd9ExMcioiYiDqAwMpxF4e8KSUcCR2ftZwKfzuZtewN/3x1FJ2ovPlqz6aIW2x/no2P9WWDvLq6rJDvlCDwiQtK5wM8lXUlhVLgY+B6wB/A8hRHQFRGxPDut61lgIoW3tX+kME+2yV3AkIhI4m1XDvYAbpTUD/gQeIXCksG/zrb3pTD/PRz4BXCPpC8DD9L2yOhmoAZ4LjtDqAn4fOc+hR5lDPAvrbbdAxwD9M2m5xaSfW4TEcuyMy6eojDomNNVhe4AJgBTsymSR4FNn7V8H6iXNB/4M/Ba95TXMb6UvgjZW61/jIgR27j/D8D1EfFIV9ZlZju3nXUKJReS+kl6CXjP4W1mXc0jcDOzRHkEbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWqP8Pz0bgU31bkeQAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# O3 Results\n", + "\n", + "print(\"Results \", plot_idx)\n", + "plot_idx += 1\n", + "\n", + "plt.title(\"4 O3CPU\")\n", + "f = open(home_path + \"/m5out_node_0/system.platform.terminal\", \"r\")\n", + "exp = 0\n", + "kernel = 0\n", + "for lines in f.read().split(\"\\n\"):\n", + " if \"Bandwidth\" in lines:\n", + " data[exp][kernel] = float(lines.split(\" \")[1])\n", + " kernel += 1\n", + " if kernel % 4 == 0:\n", + " exp += 1\n", + " kernel = 0\n", + "\n", + "print([i for i in range(4)], data)\n", + "\n", + "x_axis = np.arange(len(kernels))\n", + "plt.xticks(x_axis, kernels)\n", + "\n", + "plt.bar(x_axis, data[0], bar_width, label=\"NUMA-0\")\n", + "plt.bar(x_axis - bar_width, data[1], bar_width, label=\"NUMA-0,1\")\n", + "plt.bar(x_axis + bar_width, data[2], bar_width, label=\"NUMA-1\")\n", + "plt.legend()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/disaggregated_memory_setup/test_board_sst.py b/disaggregated_memory_setup/test_board_sst.py new file mode 100644 index 0000000000..459adfface --- /dev/null +++ b/disaggregated_memory_setup/test_board_sst.py @@ -0,0 +1,234 @@ +# Copyright (c) 2021-2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import Port, AddrRange, OutgoingRequestBridge +from gem5.resources.resource import AbstractResource +from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload +from gem5.components.boards.abstract_system_board import AbstractSystemBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.components.processors.abstract_generator import AbstractGenerator +from gem5.components.boards.test_board import TestBoard + +from typing import List, Optional, Sequence, Tuple +from gem5.utils.override import overrides + + +class TestBoardForSST(TestBoard): + """This board implements a test board for SST/External Memory devices. It + is assumed that the device has two memories. + """ + + def __init__( + self, + clk_freq: str, + generator: AbstractGenerator, + remote_memory_size: str, + memory: Optional[AbstractMemorySystem], + cache_hierarchy: Optional[AbstractCacheHierarchy], + ): + self._localMemory = None + if memory is not None: + self._localMemory = memory + self._remoteMemory = OutgoingRequestBridge() + self._remoteMemorySize = remote_memory_size + super().__init__( + clk_freq=clk_freq, + generator=generator, + memory=self._localMemory, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = self._localMemory + self.remote_memory = self._remoteMemory + + @overrides(AbstractSystemBoard) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + # raise Exception("cannot call this method") + return self._remoteMemory + + @overrides(AbstractSystemBoard) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + + @overrides(AbstractSystemBoard) + def _setup_memory_ranges(self): + # The local memory can be empty in this case. + local_memory = None + remote_memory = self.get_remote_mem() + # This is a string + remote_mem_size = self._remoteMemorySize + # using a _global_ memory range to keep a track of all the memory + # ranges. This is used to generate the dtb for this machine + start_addr_for_remote = 0x0 + self._global_mem_ranges = [] + if self.get_local_memory() is not None: + local_memory = self.get_local_memory() + self._global_mem_ranges.append( + AddrRange(start=0x0, size=local_memory.get_size()) + ) + start_addr_for_remote = local_memory.get_size() + local_memory.set_memory_range(self._global_mem_ranges[0]) + + self._global_mem_ranges.append( + AddrRange(start=start_addr_for_remote, size=remote_mem_size()) + ) + + remote_memory.physical_address_ranges = self._global_mem_ranges[-1] + + # the memory has to be setup for both the memory ranges. there is one + # local memory range, close to the host machine and the other range is + # pure memory, far from the host. + + # self._local_mem_ranges = [ + # AddrRange(start=0x80000000, size=local_mem_size) + # ] + + # The remote memory starts where the local memory ends. Therefore it + # has to be offset by the local memory's size. + # self._remote_mem_ranges = [ + # AddrRange(start=0x80000000 + local_mem_size, size=remote_mem_size) + # ] + + # keeping a hole in the mem ranges to simulate multiple nodes without + # using a translator simobject. + # remote_memory_start_addr = 0x80000000 + local_mem_size + self._instanceCount * 0x80000000 + # self._remote_mem_ranges = [ + # AddrRange(start=remote_memory_start_addr, size=remote_mem_size) + # ] + + # self._global_mem_ranges.append(self._local_mem_ranges[0]) + # self._global_mem_ranges.append(self._remote_mem_ranges[0]) + + # setting the memory ranges for both of the memory ranges. + # local_memory.set_memory_range(self._local_mem_ranges) + # remote_memory.physical_address_ranges = self._remote_mem_ranges + # remote_memory.set_memory_range(self._remote_mem_ranges) + + @overrides(TestBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + if self.get_local_memory() is not None: + self.get_local_memory().incorporate_memory(self) + + # # Add a NoncoherentXBar here + + # self.remote_link = NoncoherentXBar( + # frontend_latency = 0, + # forward_latency = 0, + # response_latency = 0, + # width = 64 + # ) + # self.get_remote_memory().port = self.remote_link.mem_side_ports + # self.get_cache_hierarchy().membus.mem_side_ports = self.remote_link.cpu_side_ports + + self.get_remote_memory().port = ( + self.get_cache_hierarchy().membus.mem_side_ports + ) + # self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + if not self.get_cache_hierarchy(): + # If we have no caches, then there must be a one-to-one + # connection between the generators and the memories. + assert len(self.get_processor().get_cores()) == 1 + # assert len(self.get_memory().get_mem_ports()) == 1 + self.get_processor().get_cores()[0].connect_dcache( + self.get_remote_memory().get_remote_mem_ports()[0][1] + ) + + @overrides(TestBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + print( + "__ranges__", self.get_remote_memory().physical_address_ranges[0] + ) + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + if self.get_local_memory() is not None: + self.get_local_memory()._post_instantiate() + # self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory_setup/traffic_gen_sst.py b/disaggregated_memory_setup/traffic_gen_sst.py new file mode 100644 index 0000000000..269ff8c13e --- /dev/null +++ b/disaggregated_memory_setup/traffic_gen_sst.py @@ -0,0 +1,136 @@ +# Copyright (c) 2021-2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script is used for running a traffic generator connected to a memory +device. It supports linear and random accesses with a configurable amount +of write traffic. + +By default, this scripts runs with one channel (two pseudo channels) of HBM2 +and this channel is driven with 32GiB/s of traffic for 1ms. +""" + +import argparse + +from m5.objects import MemorySize, AddrRange + +# from gem5.components.boards.test_board import TestBoard + +from test_board_sst import TestBoardForSST + +from gem5.components.processors.linear_generator import LinearGenerator +from gem5.components.processors.random_generator import RandomGenerator + +from gem5.components.memory.hbm import HighBandwidthMemory +from gem5.components.memory.dram_interfaces.hbm import HBM_2000_4H_1x64 + +from gem5.simulate.simulator import Simulator + +# For hooking up SST with this system. +from m5.objects import OutgoingRequestBridge + + +def generator_factory( + generator_class: str, rd_perc: int, mem_size: MemorySize +): + rd_perc = int(rd_perc) + if rd_perc > 100 or rd_perc < 0: + raise ValueError( + "Read percentage has to be an integer number between 0 and 100." + ) + if generator_class == "LinearGenerator": + return LinearGenerator( + duration="1ms", rate="32GiB/s", max_addr=mem_size, rd_perc=rd_perc + ) + elif generator_class == "RandomGenerator": + return RandomGenerator( + duration="1ms", rate="32GiB/s", max_addr=mem_size, rd_perc=rd_perc + ) + else: + raise ValueError(f"Unknown generator class {generator_class}") + + +parser = argparse.ArgumentParser( + description="A traffic generator that can be used to test a gem5 " + "memory component." +) + +parser.add_argument( + "--generator-class", + type=str, + help="The class of generator to use.", + choices=[ + "LinearGenerator", + "RandomGenerator", + ], + default="LinearGenerator", +) + +parser.add_argument( + "--memory-size", type=str, help="Memory size as a string", default="1GiB" +) + +parser.add_argument( + "--read-percentage", + type=int, + help="Percentage of read requests in the generated traffic.", + default=100, +) + + +args = parser.parse_args() + +# Single pair of HBM2 pseudo channels. This can be replaced with any +# single ported memory device +# memory = HighBandwidthMemory(HBM_2000_4H_1x64, 1, 128) +memory_size = args.memory_size +# sst_memory = OutgoingRequestBridge(physical_address_ranges = AddrRange(start = 0x0, size = memory_size)) + +# print("mem-size: ", str(sst_memory.physical_address_ranges[0])[2:]) + +generator = generator_factory( + args.generator_class, + args.read_percentage, + int(str(AddrRange(0x0, memory_size))[2:]), +) + +# We use the Test Board. This is a special board to run traffic generation +# tasks. Can replace the cache_hierarchy with any hierarchy to simulate the +# cache as well as the memory +board = TestBoardForSST( + clk_freq="1GHz", # Ignored for these generators + generator=generator, # We pass the traffic generator as the processor. + # memory=sst_memory, + remote_memory_size=memory_size, + memory=None, + # With no cache hierarchy the test board will directly connect the + # generator to the memory + cache_hierarchy=None, +) +board._pre_instantiate() +root = Root(full_system=True, system=board) +# simulator = Simulator(board=board) +# simulator.run() diff --git a/disaggregated_memory_setup/x86_dm_board.py b/disaggregated_memory_setup/x86_dm_board.py new file mode 100644 index 0000000000..db73096ba9 --- /dev/null +++ b/disaggregated_memory_setup/x86_dm_board.py @@ -0,0 +1,383 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Creating an x86 board that can simulate more than 3 GB memory. + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + Port, + Pc, + AddrRange, + X86FsLinux, + Addr, + X86SMBiosBiosInformation, + X86IntelMPProcessor, + X86IntelMPIOAPIC, + X86IntelMPBus, + X86IntelMPBusHierarchy, + X86IntelMPIOIntAssignment, + X86E820Entry, + Bridge, + IOXBar, + IdeDisk, + CowDiskImage, + RawDiskImage, + BaseXBar, + Port, + OutgoingRequestBridge, +) + +import os +import m5 +from abc import ABCMeta +from gem5.components.boards.x86_board import X86Board +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class X86DMBoard(X86Board): + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + cache_hierarchy: AbstractCacheHierarchy, + memory: AbstractMemorySystem, + # remote_memory_str: str + # remote_memory: AbstractMemorySystem + remote_memory_size: str, + ) -> None: + self._localMemory = memory + self._remoteMemorySize = remote_memory_size + self._remoteMemory = OutgoingRequestBridge( + physical_address_ranges=AddrRange(0x40000000, 0x80000000) + ) + print(self._remoteMemory.physical_address_ranges[0]) + super().__init__( + clk_freq=clk_freq, + processor=processor, + cache_hierarchy=cache_hierarchy, + memory=memory, + ) + self.local_memory = memory + self.remote_memory = self._remoteMemory + + @overrides(X86Board) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + # raise Exception("cannot call this method") + return self._remoteMemory + + @overrides(X86Board) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + # return self.get_remote_memory().get_mem_ports() + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + + @overrides(X86Board) + def _setup_memory_ranges(self): + # Need to create 2 entries for the memory ranges + # local_memory = self.get_local_memory() + # remote_memory = self.get_local_memory() + + # local_mem_size = local_memory.get_size() + # remote_mem_size = remote_memory.get_size() + + self._local_mem_ranges = [ + "2GiB" + # AddrRange(local_mem_size) + ] + + # The remote memory starts where the local memory ends. Therefore it + # has to be offset by the local memory's size. + # self._remote_mem_ranges = [ + # AddrRange(start=0x100000000, size=remote_mem_size) + # # AddrRange(remote_mem_size) + # ] + # Keep it under 2 GB for this case. Each slice of memory is 1 GB. + + self.mem_ranges = [ + self._local_mem_ranges[0], + # self._remote_mem_ranges[0], + AddrRange(0xC0000000, size=0x100000), # For I/0 + ] + + @overrides(X86Board) + def get_default_kernel_args(self) -> List[str]: + return [ + "earlyprintk=ttyS0", + "console=ttyS0", + "lpj=7999923", + "root={root_value}", + "init=/bin/bash", + ] + + # @overrides(X86Board) + def _setup_io_devicess(self): + """Sets up the x86 IO devices. + + Note: This is mostly copy-paste from prior X86 FS setups. Some of it + may not be documented and there may be bugs. + """ + + # Constants similar to x86_traits.hh + IO_address_space_base = 0x8000000000000000 + pci_config_address_space_base = 0xC000000000000000 + interrupts_address_space_base = 0xA000000000000000 + APIC_range_size = 1 << 12 + + # Setup memory system specific settings. + if self.get_cache_hierarchy().is_ruby(): + self.pc.attachIO(self.get_io_bus(), [self.pc.south_bridge.ide.dma]) + else: + self.bridge = Bridge(delay="50ns") + self.bridge.mem_side_port = self.get_io_bus().cpu_side_ports + self.bridge.cpu_side_port = ( + self.get_cache_hierarchy().get_mem_side_port() + ) + + # # Constants similar to x86_traits.hh + IO_address_space_base = 0x8000000000000000 + pci_config_address_space_base = 0xC000000000000000 + interrupts_address_space_base = 0xA000000000000000 + APIC_range_size = 1 << 12 + + self.bridge.ranges = [ + AddrRange(0xC0000000, 0xFFFF0000), + AddrRange( + IO_address_space_base, interrupts_address_space_base - 1 + ), + AddrRange(pci_config_address_space_base, Addr.max), + ] + + self.apicbridge = Bridge(delay="50ns") + self.apicbridge.cpu_side_port = self.get_io_bus().mem_side_ports + self.apicbridge.mem_side_port = ( + self.get_cache_hierarchy().get_cpu_side_port() + ) + self.apicbridge.ranges = [ + AddrRange( + interrupts_address_space_base, + interrupts_address_space_base + + self.get_processor().get_num_cores() * APIC_range_size + - 1, + ) + ] + self.pc.attachIO(self.get_io_bus()) + + # Add in a Bios information structure. + self.workload.smbios_table.structures = [X86SMBiosBiosInformation()] + + # Set up the Intel MP table + base_entries = [] + ext_entries = [] + for i in range(self.get_processor().get_num_cores()): + bp = X86IntelMPProcessor( + local_apic_id=i, + local_apic_version=0x14, + enable=True, + bootstrap=(i == 0), + ) + base_entries.append(bp) + + io_apic = X86IntelMPIOAPIC( + id=self.get_processor().get_num_cores(), + version=0x11, + enable=True, + address=0xFEC00000, + ) + + self.pc.south_bridge.io_apic.apic_id = io_apic.id + base_entries.append(io_apic) + pci_bus = X86IntelMPBus(bus_id=0, bus_type="PCI ") + base_entries.append(pci_bus) + isa_bus = X86IntelMPBus(bus_id=1, bus_type="ISA ") + base_entries.append(isa_bus) + connect_busses = X86IntelMPBusHierarchy( + bus_id=1, subtractive_decode=True, parent_bus=0 + ) + ext_entries.append(connect_busses) + + pci_dev4_inta = X86IntelMPIOIntAssignment( + interrupt_type="INT", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=0, + source_bus_irq=0 + (4 << 2), + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=16, + ) + + base_entries.append(pci_dev4_inta) + + def assignISAInt(irq, apicPin): + + assign_8259_to_apic = X86IntelMPIOIntAssignment( + interrupt_type="ExtInt", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=1, + source_bus_irq=irq, + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=0, + ) + base_entries.append(assign_8259_to_apic) + + assign_to_apic = X86IntelMPIOIntAssignment( + interrupt_type="INT", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=1, + source_bus_irq=irq, + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=apicPin, + ) + base_entries.append(assign_to_apic) + + assignISAInt(0, 2) + assignISAInt(1, 1) + + for i in range(3, 15): + assignISAInt(i, i) + + self.workload.intel_mp_table.base_entries = base_entries + self.workload.intel_mp_table.ext_entries = ext_entries + + entries = [ + # Mark the first megabyte of memory as reserved + X86E820Entry(addr=0, size="639kB", range_type=1), + X86E820Entry(addr=0x9FC00, size="385kB", range_type=2), + # Mark the rest of physical memory as available + # the local address comes first. + X86E820Entry( + addr=0x100000, + size=f"{self.mem_ranges[0].size() - 0x100000:d}B", + range_type=1, + ), + # X86E820Entry( + # addr=0x100000000, + # size=f"{self.mem_ranges[1].size()}B", + # range_type=1, + # ), + ] + # print("____", self.mem_ranges[0].size() + 0x100000) + + # Reserve the last 16kB of the 32-bit address space for m5ops + entries.append( + X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2) + ) + + print(entries) + self.workload.e820_table.entries = entries + + @overrides(AbstractBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + print("_", self.get_local_memory().mem_ctrl) + self.get_remote_memory().port = ( + self.get_cache_hierarchy().membus.mem_side_ports + ) + # self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(AbstractBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + # self.get_remote_memory()._post_instantiate() diff --git a/ext/sst/gem5.hh b/ext/sst/gem5.hh index f9f00beabd..655557d0cd 100644 --- a/ext/sst/gem5.hh +++ b/ext/sst/gem5.hh @@ -141,11 +141,11 @@ class gem5Component: public SST::Component {"cmd", "command to run gem5's config"} ) - SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( - // These are the generally expected ports. - {"system_port", "Connection to gem5 system_port", "gem5.gem5Bridge"}, - {"cache_port", "Connection to gem5 CPU", "gem5.gem5Bridge"} - ) + // SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( + // // These are the generally expected ports. + // {"system_port", "Connection to gem5 system_port", "gem5.gem5Bridge"}, + // {"cache_port", "Connection to gem5 CPU", "gem5.gem5Bridge"} + // ) }; diff --git a/ext/sst/sst/example_board.py b/ext/sst/sst/example_board.py new file mode 100644 index 0000000000..8439f39ae2 --- /dev/null +++ b/ext/sst/sst/example_board.py @@ -0,0 +1,144 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ps" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to +# memory currently, we do not subtract 0x80000000 from the request's address to +# get the "real" address so, the mem_size would always be 2GiB larger than the +# desired memory size + +# memory_size_gem5 = "2GiB" +memory_size_sst = "6GiB" +addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32MiB", + "L1" : "1", +} + +# We keep a track of all the memory ports that we have. +sst_ports = { + # "system_port" : "system.system_outgoing_bridge", + # "cache_port" : "system.memory_outgoing_bridge" + "remote_memory_port" : "system.remote_memory" +} + +# We need a list of ports. +port_list = [] +for port in sst_ports: + port_list.append(port) + +cpu_params = { + "frequency": cpu_clock_rate, + "cmd": " ../../disaggregated_memory_setup/numa_config_sst.py", + "debug_flags": "VIO", + "ports" : " ".join(port_list) +} + +gem5_node = sst.Component("gem5_node", "gem5.gem5Component") +gem5_node.addParams(cpu_params) + +# cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") +# cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +# for initialization +# system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) +# tell the SubComponent the name of the corresponding SimObject +# system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) + +# SST -> gem5 +# cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0) +# cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) + +# L1 cache +# l1_cache = sst.Component("l1_cache", "memHierarchy.Cache") +# l1_cache.addParams(l1_params) + +remote_memory_port = gem5_node.setSubComponent("remote_memory_port", + "gem5.gem5Bridge", 0) +remote_memory_port.addParams({ + "response_receiver_name" : sst_ports["remote_memory_port"] +}) + +# Memory +memctrl = sst.Component("memory", "memHierarchy.MemController") +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "30ns", + "mem_size" : memory_size_sst +}) + +# Connections +# cpu <-> L1 +# cpu_cache_link = sst.Link("cpu_l1_cache_link") +# cpu_cache_link.connect( +# (cache_port, "port", cache_link_latency), +# (cache_bus, "high_network_0", cache_link_latency) +# ) +# system_cache_link = sst.Link("system_cache_link") +# system_cache_link.connect( +# (system_port, "port", cache_link_latency), +# (cache_bus, "high_network_1", cache_link_latency) +# ) +# cache_bus_cache_link = sst.Link("cache_bus_cache_link") +# cache_bus_cache_link.connect( +# (cache_bus, "low_network_0", cache_link_latency), +# (l1_cache, "high_network_0", cache_link_latency) +# ) +# L1 <-> mem +cache_mem_link = sst.Link("l1_cache_mem_link") +cache_mem_link.connect( + (remote_memory_port, "port", cache_link_latency), + # (l1_cache, "low_network_0", cache_link_latency), + (memctrl, "direct_link", cache_link_latency) +) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(5) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-stats.txt"}) diff --git a/ext/sst/sst/example_board_x86.py b/ext/sst/sst/example_board_x86.py new file mode 100644 index 0000000000..5d7d3464a4 --- /dev/null +++ b/ext/sst/sst/example_board_x86.py @@ -0,0 +1,144 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ps" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to +# memory currently, we do not subtract 0x80000000 from the request's address to +# get the "real" address so, the mem_size would always be 2GiB larger than the +# desired memory size + +# memory_size_gem5 = "2GiB" +memory_size_sst = "6GiB" +addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32MiB", + "L1" : "1", +} + +# We keep a track of all the memory ports that we have. +sst_ports = { + # "system_port" : "system.system_outgoing_bridge", + # "cache_port" : "system.memory_outgoing_bridge" + "remote_memory_port" : "system.remote_memory" +} + +# We need a list of ports. +port_list = [] +for port in sst_ports: + port_list.append(port) + +cpu_params = { + "frequency": cpu_clock_rate, + "cmd": " ../../disaggregated_memory_setup/numa_config_x86.py", + "debug_flags": "", + "ports" : " ".join(port_list) +} + +gem5_node = sst.Component("gem5_node", "gem5.gem5Component") +gem5_node.addParams(cpu_params) + +# cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") +# cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +# for initialization +# system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) +# tell the SubComponent the name of the corresponding SimObject +# system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) + +# SST -> gem5 +# cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0) +# cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) + +# L1 cache +# l1_cache = sst.Component("l1_cache", "memHierarchy.Cache") +# l1_cache.addParams(l1_params) + +remote_memory_port = gem5_node.setSubComponent("remote_memory_port", + "gem5.gem5Bridge", 0) +remote_memory_port.addParams({ + "response_receiver_name" : sst_ports["remote_memory_port"] +}) + +# Memory +memctrl = sst.Component("memory", "memHierarchy.MemController") +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "30ns", + "mem_size" : memory_size_sst +}) + +# Connections +# cpu <-> L1 +# cpu_cache_link = sst.Link("cpu_l1_cache_link") +# cpu_cache_link.connect( +# (cache_port, "port", cache_link_latency), +# (cache_bus, "high_network_0", cache_link_latency) +# ) +# system_cache_link = sst.Link("system_cache_link") +# system_cache_link.connect( +# (system_port, "port", cache_link_latency), +# (cache_bus, "high_network_1", cache_link_latency) +# ) +# cache_bus_cache_link = sst.Link("cache_bus_cache_link") +# cache_bus_cache_link.connect( +# (cache_bus, "low_network_0", cache_link_latency), +# (l1_cache, "high_network_0", cache_link_latency) +# ) +# L1 <-> mem +cache_mem_link = sst.Link("l1_cache_mem_link") +cache_mem_link.connect( + (remote_memory_port, "port", cache_link_latency), + # (l1_cache, "low_network_0", cache_link_latency), + (memctrl, "direct_link", cache_link_latency) +) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(5) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-stats.txt"}) diff --git a/ext/sst/sst/example_freq.py b/ext/sst/sst/example_freq.py new file mode 100644 index 0000000000..7886e196b4 --- /dev/null +++ b/ext/sst/sst/example_freq.py @@ -0,0 +1,139 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "0ps" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "1GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to +# memory currently, we do not subtract 0x80000000 from the request's address to +# get the "real" address so, the mem_size would always be 2GiB larger than the +# desired memory size +memory_size_gem5 = "2GiB" +memory_size_sst = "4GiB" +addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "2 MiB", + "L1" : "1", +} + +# We keep a track of all the memory ports that we have. +sst_ports = { + # "system_port" : "system.system_outgoing_bridge", + "cache_port" : "system.memory_outgoing_bridge" +} + +# We need a list of ports. +port_list = [] +for port in sst_ports: + port_list.append(port) + +cpu_params = { + "frequency": cpu_clock_rate, + "cmd": " --outdir=10MHz ../../configs/example/sst/riscv_fs.py" + + f" --cpu-clock-rate {cpu_clock_rate}" + + f" --memory-size {memory_size_gem5}", + "debug_flags": "VIO", + "ports" : " ".join(port_list) +} + +gem5_node = sst.Component("gem5_node", "gem5.gem5Component") +gem5_node.addParams(cpu_params) + +cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") +cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +# for initialization +# system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) +# tell the SubComponent the name of the corresponding SimObject +# system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) + +# SST -> gem5 +cache_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) +cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) + +# L1 cache +# l1_cache = sst.Component("l1_cache", "memHierarchy.Cache") +# l1_cache.addParams(l1_params) + +# Memory +memctrl = sst.Component("memory", "memHierarchy.MemController") +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "1GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "48.75ns", + "mem_size" : memory_size_sst +}) + +# Connections +# cpu <-> L1 +cpu_cache_link = sst.Link("cpu_l1_cache_link") +cpu_cache_link.connect( + (cache_port, "port", cache_link_latency), + (cache_bus, "high_network_0", cache_link_latency) +) +# system_cache_link = sst.Link("system_cache_link") +# system_cache_link.connect( +# (system_port, "port", cache_link_latency), +# (cache_bus, "high_network_1", cache_link_latency) +# ) +# cache_bus_cache_link = sst.Link("cache_bus_cache_link") +# cache_bus_cache_link.connect( +# (cache_bus, "low_network_0", cache_link_latency), +# (l1_cache, "high_network_0", cache_link_latency) +# ) +# L1 <-> mem +cache_mem_link = sst.Link("l1_cache_mem_link") +cache_mem_link.connect( + (cache_bus, "low_network_0", cache_link_latency), + (memctrl, "direct_link", cache_link_latency) +) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(5) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-stats-10Mhz.txt"}) +# sst.enableAllStatisticsForComponentName("l1_cache", stat_params) +sst.enableAllStatisticsForComponentName("memory", stat_params) diff --git a/ext/sst/sst/example_nodes.py b/ext/sst/sst/example_nodes.py new file mode 100644 index 0000000000..2f14c4db93 --- /dev/null +++ b/ext/sst/sst/example_nodes.py @@ -0,0 +1,221 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# DEfine the number of gem5 nodes in the system. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_size = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_size[0])) + 2) + "GiB" +print(sst_memory_size) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +cache_buses = [] +directory_caches = [] +comp_dirctrls = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [ + "--outdir=m5out_{}".format(node), + "../../configs/example/sst/riscv_fs_node.py", + "--cpu-clock-rate {}".format(cpu_clock_rate), + "--memory-size {}".format(node_memory_size), + # "--local-memory-size {}".format(node_memory_size), + # "--remote-memory-size {}".format(remote_memory_slice), + "--instance {}".format(node) + ] + ports = { + "remote_memory_port" : "system.memory_outgoing_bridge" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "Plic,Clint", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + # We need a separate cache bus for each of the nodes + cache_buses.append( + sst.Component("cache_bus_for_node_{}".format(node), "memHierarchy.Bus") + ) + cache_buses[node].addParams({"bus_frequency" : cpu_clock_rate}) + # TODO: This needs to be updated + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : "system.memory_outgoing_bridge" + }) + directory_caches.append(create_cache("dir_cache_{}".format(node))) + directory_caches[node].addParams({"network_address" : "2" }) + # Connect the basic components. + connect_components("node_{}_mem_port_2_bus".format(node), + memory_ports[node], 0, + cache_buses[node], 0, + port = True) + connect_components("node_{}_bus_2_dir_cache".format(node), + cache_buses[node], 0, + directory_caches[node], 0) + # Create directory controllers that dictates the memory ranges for each of + # the remote meory nodes. + comp_dirctrls.append(sst.Component( + "dirctrl_for_node_{}".format(node), + "memHierarchy.DirectoryController") + ) + addr_range_start = 0x80000000 + node * 0x80000000 + addr_range_end = 0x80000000 + (node + 1) * 0x80000000 + comp_dirctrls[node].addParams({ + "coherence_protocol" : "MESI", + "network_address" : "1", + "entry_cache_size" : "16384", + "network_bw" : "25GB/s", + "addr_range_start" : addr_range_start, # 2 * (1024 ** 3), # starts at 0x80000000 + "addr_range_end" : addr_range_end # 2 * (1024 ** 3) + 2048 * (1024 ** 2) # ends at 0x100000000 (4GiB) + }) +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes +memory = sst.Component("memory", "memHierarchy.MemController") +memory.addParams({ + "request_width" : 64, + "coherence_protocol" : "MESI", + "access_time" : "33 ns", + "backend.mem_size" : sst_memory_size, + "clock" : "2.4GHz", + "debug" : "0", + "range_start" : 2 * (1024 ** 3), # it's behind a directory controller and it starts at 0x80000000 + }) +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") +comp_chiprtr.addParams({ + "xbar_bw" : "128GB/s", + "link_bw" : "128GB/s", + "input_buf_size" : "1KB", + "num_ports" : str(system_nodes * 2), + "flit_size" : "72B", + "output_buf_size" : "1KB", + "id" : "0", + "topology" : "merlin.singlerouter" +}) +mem_bus = sst.Component("membus", "memHierarchy.Bus") +# Finally connect all the nodes together in the net +for node in range(system_nodes): + sst.Link("link_cache_net_node_{}".format(node)).connect( + (directory_caches[node], "directory", "10ns"), + (comp_chiprtr, "port" + str(node * 2 + 1), "2ns")) + sst.Link("link_dir_net_nodes_{}".format(node)).connect( + (comp_chiprtr, "port" + str(node * 2), "2ns"), + (comp_dirctrls[node], "network", "2ns")) + sst.Link("link_dir_mem_link_node_{}".format(node)).connect( + (comp_dirctrls[node], "memory", "10ns"), + (memory, "direct_link", "10ns")) +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-router-example.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes_multi_ISA.py b/ext/sst/sst/example_nodes_multi_ISA.py new file mode 100644 index 0000000000..46e04a9751 --- /dev/null +++ b/ext/sst/sst/example_nodes_multi_ISA.py @@ -0,0 +1,234 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() +print(sst_memory_size) + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "30ns", + "mem_size" : sst_memory_size +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [] + ports = {} + if node % 2 == 1: + # This is a RISCV node + cmd = [ + f"--outdir=m5out_riscv_node_{node}", + "../../disaggregated_memory_setup/numa_config_sst_nodes.py", + f"--cpu-clock-rate {cpu_clock_rate}", + f"--instance {node}" + + # "--outdir=m5out_{}".format(node), + # "../../configs/example/sst/riscv_fs_node.py", + # "--cpu-clock-rate {}".format(cpu_clock_rate), + # "--memory-size {}".format(node_memory_slice), + # # "--local-memory-size {}".format(node_memory_slice), + # # "--remote-memory-size {}".format(remote_memory_slice), + # "--instance {}".format(node) + ] + ports = { + "remote_memory_port" : "system.remote_memory" + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + else: + cmd = [ + f"--outdir=m5out_arm_node_{node}", + "../../disaggregated_memory_setup/numa_config_sst_nodes_arm.py", + f"--cpu-clock-rate {cpu_clock_rate}", + f"--cpu-type timing", + f"--local-memory-range 2GiB", + f"--remote-memory-range 4294967296,6442450944", + # f"--instance {node}" + ] + ports = { + "remote_memory_port" : "system.remote_memory_outgoing_bridge" + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + # This is an ARM core + + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "Plic,Clint,VIO", + "ports" : " ".join(port_list) + } + + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + + # directory_caches.append(create_cache("dir_cache_{}".format(node))) + # directory_caches[node].addParams({"network_address" : "2" }) + # Connect the basic components. + # connect_components("node_{}_mem_port_2_bus".format(node), + # memory_ports[node], 0, + # cache_buses[node], node, + # port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-example.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes_w_board.py b/ext/sst/sst/example_nodes_w_board.py new file mode 100644 index 0000000000..40eaae9656 --- /dev/null +++ b/ext/sst/sst/example_nodes_w_board.py @@ -0,0 +1,206 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 4 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() +print(sst_memory_size) + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "50ns", + "mem_size" : sst_memory_size +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [ + f"--outdir=m5out_node_ARM_O3_{node}", + # f"--outdir=switch_{node}", + # "../../disaggregated_memory_setup/numa_config_sst_nodes.py", + "../../disaggregated_memory_setup/numa_config_sst_nodes_arm.py", + f"--cpu-clock-rate {cpu_clock_rate}", + f"--instance {node}" + + # "--outdir=m5out_{}".format(node), + # "../../configs/example/sst/riscv_fs_node.py", + # "--cpu-clock-rate {}".format(cpu_clock_rate), + # "--memory-size {}".format(node_memory_slice), + # # "--local-memory-size {}".format(node_memory_slice), + # # "--remote-memory-size {}".format(remote_memory_slice), + # "--instance {}".format(node) + ] + ports = { + "remote_memory_port" : "system.remote_memory" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + + # directory_caches.append(create_cache("dir_cache_{}".format(node))) + # directory_caches[node].addParams({"network_address" : "2" }) + # Connect the basic components. + # connect_components("node_{}_mem_port_2_bus".format(node), + # memory_ports[node], 0, + # cache_buses[node], node, + # port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-example-O3.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes_w_board_arm.py b/ext/sst/sst/example_nodes_w_board_arm.py new file mode 100644 index 0000000000..0096a190d1 --- /dev/null +++ b/ext/sst/sst/example_nodes_w_board_arm.py @@ -0,0 +1,201 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() +print(sst_memory_size) + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "50ns", + "mem_size" : sst_memory_size +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + node_range = [0x80000000 + (node + 1) * 0x80000000, 0x80000000 + (node + 2) * 0x80000000] + print(node_range) + cmd = [ + f"--outdir=m5out_arm_node_{node}", + "../../disaggregated_memory_setup/numa_config_sst_nodes_arm.py", + f"--cpu-clock-rate {cpu_clock_rate}", + f"--cpu-type o3", + f"--local-memory-range 2GiB", + f"--remote-memory-range {node_range[0]},{node_range[1]}", + # f"--instance {node}" + ] + ports = { + "remote_memory_port" : "system.remote_memory" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + + # directory_caches.append(create_cache("dir_cache_{}".format(node))) + # directory_caches[node].addParams({"network_address" : "2" }) + # Connect the basic components. + # connect_components("node_{}_mem_port_2_bus".format(node), + # memory_ports[node], 0, + # cache_buses[node], node, + # port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-arm-example-O3.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes_w_board_full_mem.py b/ext/sst/sst/example_nodes_w_board_full_mem.py new file mode 100644 index 0000000000..00e492f8c2 --- /dev/null +++ b/ext/sst/sst/example_nodes_w_board_full_mem.py @@ -0,0 +1,204 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "3GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() +print(sst_memory_size) + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "50ns", + "mem_size" : sst_memory_size +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [ + f"--outdir=m5out_node_O3x_{node}", + "../../disaggregated_memory_setup/numa_config_sst_nodes.py", + f"--cpu-clock-rate {cpu_clock_rate}", + f"--instance {node}" + + # "--outdir=m5out_{}".format(node), + # "../../configs/example/sst/riscv_fs_node.py", + # "--cpu-clock-rate {}".format(cpu_clock_rate), + # "--memory-size {}".format(node_memory_slice), + # # "--local-memory-size {}".format(node_memory_slice), + # # "--remote-memory-size {}".format(remote_memory_slice), + # "--instance {}".format(node) + ] + ports = { + "remote_memory_port" : "system.remote_memory" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + + # directory_caches.append(create_cache("dir_cache_{}".format(node))) + # directory_caches[node].addParams({"network_address" : "2" }) + # Connect the basic components. + # connect_components("node_{}_mem_port_2_bus".format(node), + # memory_ports[node], 0, + # cache_buses[node], node, + # port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-example.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_traffic_gen.py b/ext/sst/sst/example_traffic_gen.py new file mode 100644 index 0000000000..0ad3e10a3a --- /dev/null +++ b/ext/sst/sst/example_traffic_gen.py @@ -0,0 +1,218 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ns" + +bbl = "riscv-boot-exit-nodisk" +cpu_clock_rate = "1GHz" +# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory +# currently, we do not subtract 0x80000000 from the request's address to get the "real" address +# so, the mem_size would always be 2GiB larger than the desired memory size +# memory_size_gem5 = "2GiB" +# memory_size_sst = "4GiB" +# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() + +l1_params = { + "access_latency_cycles" : "1", + "cache_frequency" : cpu_clock_rate, + "replacement_policy" : "lru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : "64", + "cache_size" : "32 MiB", + "L1" : "1", +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 1, +} + +def create_cache(name, params = None): + cache = sst.Component(name, "memHierarchy.Cache") + if params is None: + cache.addParams(l1_params) + else: + cache.addParams(params) + return cache + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 1 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() +print(sst_memory_size) + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "1", + "clock" : "1GHz", + "request_width" : "64", + "verbose" : 2, + "debug_level" : 10, + "backing" : "none", + "addr_range_end" : addr_range_end, +}) +# memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +# memory.addParams({ +# "access_time" : "50ns", +# "mem_size" : sst_memory_size +# }) + +memory = memctrl.setSubComponent( "backend", "memHierarchy.timingDRAM") +memory.addParams({ + "id" : 0, + "addrMapper" : "memHierarchy.sandyBridgeAddrMapper", + "addrMapper.interleave_size" : "64B", + "addrMapper.row_size" : "1KiB", + "clock" : "2.4GHz", + "mem_size" : sst_memory_size, + "channels" : 1, + "channel.numRanks" : 2, + "channel.rank.numBanks" : 16, + "channel.transaction_Q_size" : 64, + "channel.rank.bank.CL" : 14, + "channel.rank.bank.CL_WR" : 12, + "channel.rank.bank.RCD" : 14, + "channel.rank.bank.TRP" : 14, + "channel.rank.bank.dataCycles" : 2, + "channel.rank.bank.pagePolicy" : "memHierarchy.timeoutPagePolicy", + "channel.rank.bank.transactionQ" : "memHierarchy.reorderTransactionQ", + "channel.rank.bank.pagePolicy.timeoutCycles" : 50, + "printconfig" : 0, + "channel.printconfig" : 0, + "channel.rank.printconfig" : 0, + "channel.rank.bank.printconfig" : 0, +}) + + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [ + f"--outdir=traffic_gen_{node}", + "../../configs/example/sst/traffic_gen.py", + f"--cpu-clock-rate {cpu_clock_rate}", + "--memory-size 1GiB" + ] + ports = { + "remote_memory_port" : "system.memory_outgoing_bridge" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", # TrafficGen", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-traffic-example.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst_responder.cc b/ext/sst/sst_responder.cc index 14c64abb52..b97910d566 100644 --- a/ext/sst/sst_responder.cc +++ b/ext/sst/sst_responder.cc @@ -65,4 +65,6 @@ void SSTResponder::handleRecvFunctional(gem5::PacketPtr pkt) { owner->handleRecvFunctional(pkt); + // std::cout << "SSTResponder" << std::endl; + // make these timing? } diff --git a/ext/sst/sst_responder_subcomponent.cc b/ext/sst/sst_responder_subcomponent.cc index 8cd2c04628..a49092db9e 100644 --- a/ext/sst/sst_responder_subcomponent.cc +++ b/ext/sst/sst_responder_subcomponent.cc @@ -108,6 +108,9 @@ SSTResponderSubComponent::init(unsigned phase) addr, data.size(), data); memoryInterface->sendUntimedData(request); } + responseReceiver->initData.clear(); + responseReceiver->initPhaseComplete(true); + std::cout << "Init phase is complete " << responseReceiver->getInitPhaseStatus() << std::endl; } memoryInterface->init(phase); } @@ -205,6 +208,25 @@ SSTResponderSubComponent::portEventHandler( dynamic_cast(request)) { return; } + else if (SST::Interfaces::StandardMem::ReadResp* test = + dynamic_cast( + request)) { + // functional calls this + // gem5::RequestPtr req = std::make_shared( + // dynamic_cast( + // request)->pAddr, + // dynamic_cast( + // request)->size, 0, 0); + + // gem5::PacketPtr pkt = new gem5::Packet( + // req, gem5::MemCmd::ReadResp); + + // // Clear out bus delay notifications + // pkt->headerDelay = pkt->payloadDelay = 0; + // if (!(responseReceiver->sendTimingResp(pkt))) + // responseQueue.push(pkt); + return; + } else if (SST::Interfaces::StandardMem::WriteResp* test = dynamic_cast( request)) { @@ -241,6 +263,44 @@ SSTResponderSubComponent::handleRecvRespRetry() void SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) { + // SST does not understand what is a functional access in gem5. Since it + // has all the stored in it's memory, any functional access made to SST has + // to be correctly handled. All functional access *must be* writes. + std::cout << "handleRecvFunc was called! Need to do something here!" << std::endl; + // basically this data has to be present + gem5::Addr addr = pkt->getAddr(); + uint8_t* ptr = pkt->getPtr(); + uint64_t size = pkt->getSize(); + + // Create a new request to handle this request immediately. + SST::Interfaces::StandardMem::Request* request = nullptr; + + // we need a minimal translator here which does reads and writes. + switch((gem5::MemCmd::Command)pkt->cmd.toInt()) { + case gem5::MemCmd::WriteReq: { + std::vector data(ptr, ptr+size); + request = new SST::Interfaces::StandardMem::Write( + addr, data.size(), data); + break; + } + case gem5::MemCmd::ReadReq: { + request = new SST::Interfaces::StandardMem::Read(addr, size); + break; + } + default: + panic("handleRecvFunctional: Unable to convert gem5 packet: %s\n", pkt->cmd.toString()); + } + if(pkt->req->isUncacheable()) { + request->setFlag( + SST::Interfaces::StandardMem::Request::Flag::F_NONCACHEABLE); + } + memoryInterface->send(request); + // memoryInterface->sendUntimedData(request); + + // memoryInterface->init(phase); + // sst does not understand what is a functional request. queue this as a + // regular memory packet. + } bool diff --git a/ext/sst/translator.hh b/ext/sst/translator.hh index bf6a168d9a..1ecb5e4ec5 100644 --- a/ext/sst/translator.hh +++ b/ext/sst/translator.hh @@ -157,7 +157,9 @@ gem5RequestToSSTRequest(gem5::PacketPtr pkt, if (pkt->needsResponse()) sst_request_id_to_packet_map[request->getID()] = pkt; - + + // if(gem5::curTick() > 340330000000) + // std::cout << request->getString() << std::endl; return request; } @@ -165,6 +167,8 @@ inline void inplaceSSTRequestToGem5PacketPtr(gem5::PacketPtr pkt, SST::Interfaces::StandardMem::Request* request) { + // if(gem5::curTick() > 340330000000) + // std::cout << request->getString() << std::endl; pkt->makeResponse(); // Resolve the success of Store Conditionals diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 58abfbad46..12e1f01bcc 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021 The Regents of the University of California +// Copyright (c) 2021-2023 The Regents of the University of California // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ OutgoingRequestBridge::OutgoingRequestBridge( physicalAddressRanges(params.physical_address_ranges.begin(), params.physical_address_ranges.end()) { + this->init_phase_bool = false; } OutgoingRequestBridge::~OutgoingRequestBridge() @@ -84,6 +85,7 @@ OutgoingRequestBridge::getAddrRanges() const std::vector>> OutgoingRequestBridge::getInitData() const { + std::cout << "getInitData() called!" << std::endl; return initData; } @@ -105,19 +107,48 @@ OutgoingRequestBridge::sendTimingSnoopReq(gem5::PacketPtr pkt) outgoingPort.sendTimingSnoopReq(pkt); } +void +OutgoingRequestBridge::initPhaseComplete(bool value) { + init_phase_bool = value; +} +bool +OutgoingRequestBridge::getInitPhaseStatus() { + return init_phase_bool; +} void OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) { - uint8_t* ptr = pkt->getPtr(); - uint64_t size = pkt->getSize(); - std::vector data(ptr, ptr+size); - initData.push_back(std::make_pair(pkt->getAddr(), data)); + // This should not receive any functional accesses + gem5::MemCmd::Command pktCmd = (gem5::MemCmd::Command)pkt->cmd.toInt(); + // std::cout << "Recv Functional : 0x" << std::hex << pkt->getAddr() << + // std::dec << " " << pktCmd << " " << gem5::MemCmd::WriteReq << " " << + // getInitPhaseStatus() << std::endl; + // Check at which stage are we at. If we are at INIT phase, then queue all + // these packets. + if (!getInitPhaseStatus()) + { + // sstResponder->recvAtomic(pkt); + uint8_t* ptr = pkt->getPtr(); + uint64_t size = pkt->getSize(); + std::vector data(ptr, ptr+size); + initData.push_back(std::make_pair(pkt->getAddr(), data)); + } + // This is the RUN phase. + else { + // These packets have to translated at runtime. We convert these + // packets to timing as its data has to be stored correctly in SST + // memory. + // + // Ensure that these packets are write requests. + sstResponder->handleRecvFunctional(pkt); + } } Tick OutgoingRequestBridge:: OutgoingRequestPort::recvAtomic(PacketPtr pkt) { + // return 0; assert(false && "OutgoingRequestPort::recvAtomic not implemented"); return Tick(); } diff --git a/src/sst/outgoing_request_bridge.hh b/src/sst/outgoing_request_bridge.hh index af049af45a..0a3e2e2821 100644 --- a/src/sst/outgoing_request_bridge.hh +++ b/src/sst/outgoing_request_bridge.hh @@ -69,6 +69,18 @@ class OutgoingRequestBridge: public SimObject AddrRangeList getAddrRanges() const; }; + // We need a boolean variable to distinguish between INIT and RUN phases in + // SST. Gem5 does functional accesses to the SST memory when: + // (a) It loads the kernel (at the start of the simulation + // (b) During VIO/disk accesses. + // While loading the kernel, it is easy to handle all functional accesses + // as SST allows initializing of untimed data during its INIT phase. + // However, functional accesses done to the SST memory during RUN phase has + // to handled separately. In this implementation, we convert all such + // functional accesses to timing accesses so that it is correctly read from + // the memory. + bool init_phase_bool; + public: // a gem5 ResponsePort OutgoingRequestPort outgoingPort; @@ -97,8 +109,18 @@ class OutgoingRequestBridge: public SimObject // Returns the buffered data for initialization. This is necessary as // when gem5 sends functional requests to memory for initialization, // the connection in SST Memory Hierarchy has not been constructed yet. + // This buffer is only used during the INIT phase. std::vector>> getInitData() const; + // We need Set/Get functions to set the init_phase_bool. + // `initPhaseComplete` is used to signal the outgoing bridge that INIT + // phase is completed and RUN phase will start. + void initPhaseComplete(bool value); + + // We read the value of the init_phase_bool using `getInitPhaseStatus` + // method. + + bool getInitPhaseStatus(); // gem5 Component (from SST) will call this function to let set the // bridge's corresponding SSTResponderSubComponent (which implemented // SSTResponderInterface). I.e., this will connect this bridge to the From 9f4ec7416306a11fc378442a34ce4f6b7622d43c Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Mon, 30 Oct 2023 19:49:41 -0700 Subject: [PATCH 06/23] ext: added functional accesses to gem5/SST bridge. This change adds functional accesses to the gem5/SST bridge to boot disk images using the bridge. The change also adds more information in INSTALL.md for MacOS users on how to get the bridge setup working. Signed-off-by: Kaustav Goswami --- ext/sst/INSTALL.md | 7 +++++++ ext/sst/Makefile | 21 --------------------- ext/sst/sst_responder.cc | 2 -- ext/sst/sst_responder_subcomponent.cc | 16 +++++++++++----- src/sst/outgoing_request_bridge.cc | 14 ++++++++------ 5 files changed, 26 insertions(+), 34 deletions(-) delete mode 100644 ext/sst/Makefile diff --git a/ext/sst/INSTALL.md b/ext/sst/INSTALL.md index 63340bef51..3d2c4940b9 100644 --- a/ext/sst/INSTALL.md +++ b/ext/sst/INSTALL.md @@ -98,6 +98,13 @@ According to the OS that you're using, you need to rename the `Makefile.xxx` to cp Makefile.xxx Makefile # linux or mac make -j4 ``` +If you are compiling this on Mac, then you'd need to export `DYLD_LIBRARY_PATH` +```sh +# go to the base gem5 directory +cd ../.. +export DYLD_LIBRARY_PATH=:`pwd`/build/RISCV/ +``` + Change `ARCH=RISCV` to `ARCH=ARM` in the `Makefile` in case you're compiling for ARM. ### Running an example simulation diff --git a/ext/sst/Makefile b/ext/sst/Makefile deleted file mode 100644 index 9213d266e9..0000000000 --- a/ext/sst/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -SST_VERSION=SST-11.1.0 # Name of the .pc file in lib/pkgconfig where SST is installed -GEM5_LIB=gem5_opt -ARCH=RISCV -OFLAG=3 - -LDFLAGS=-shared -fno-common ${shell pkg-config ${SST_VERSION} --libs} -L../../build/${ARCH}/ -Wl,-rpath ../../build/${ARCH} -CXXFLAGS=-std=c++17 -g -O${OFLAG} -fPIC ${shell pkg-config ${SST_VERSION} --cflags} ${shell python3-config --includes} -I../../build/${ARCH}/ -I../../ext/pybind11/include/ -I../../build/softfloat/ -I../../ext -CPPFLAGS+=-MMD -MP -SRC=$(wildcard *.cc) - -.PHONY: clean all - -all: libgem5.so - -libgem5.so: $(SRC:%.cc=%.o) - ${CXX} ${CPPFLAGS} ${LDFLAGS} $? -o $@ -l${GEM5_LIB} - --include $(SRC:%.cc=%.d) - -clean: - ${RM} *.[do] libgem5.so diff --git a/ext/sst/sst_responder.cc b/ext/sst/sst_responder.cc index b97910d566..14c64abb52 100644 --- a/ext/sst/sst_responder.cc +++ b/ext/sst/sst_responder.cc @@ -65,6 +65,4 @@ void SSTResponder::handleRecvFunctional(gem5::PacketPtr pkt) { owner->handleRecvFunctional(pkt); - // std::cout << "SSTResponder" << std::endl; - // make these timing? } diff --git a/ext/sst/sst_responder_subcomponent.cc b/ext/sst/sst_responder_subcomponent.cc index a49092db9e..75feca13ec 100644 --- a/ext/sst/sst_responder_subcomponent.cc +++ b/ext/sst/sst_responder_subcomponent.cc @@ -263,11 +263,13 @@ SSTResponderSubComponent::handleRecvRespRetry() void SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) { - // SST does not understand what is a functional access in gem5. Since it + // SST does not understand what is a functional access in gem5 since SST + // only allows functional accesses at init time. Since it // has all the stored in it's memory, any functional access made to SST has - // to be correctly handled. All functional access *must be* writes. + // to be correctly handled. The idea here is to convert this timing access + // into a timing access and keep the SST memory consistent. std::cout << "handleRecvFunc was called! Need to do something here!" << std::endl; - // basically this data has to be present + gem5::Addr addr = pkt->getAddr(); uint8_t* ptr = pkt->getPtr(); uint64_t size = pkt->getSize(); @@ -275,7 +277,8 @@ SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) // Create a new request to handle this request immediately. SST::Interfaces::StandardMem::Request* request = nullptr; - // we need a minimal translator here which does reads and writes. + // we need a minimal translator here which does reads and writes. Any other + // command type is unexpected and the program should crash immediately. switch((gem5::MemCmd::Command)pkt->cmd.toInt()) { case gem5::MemCmd::WriteReq: { std::vector data(ptr, ptr+size); @@ -288,7 +291,10 @@ SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) break; } default: - panic("handleRecvFunctional: Unable to convert gem5 packet: %s\n", pkt->cmd.toString()); + panic( + "handleRecvFunctional: Unable to convert gem5 packet: %s\n", + pkt->cmd.toString() + ); } if(pkt->req->isUncacheable()) { request->setFlag( diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 12e1f01bcc..74426d778c 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -85,7 +85,6 @@ OutgoingRequestBridge::getAddrRanges() const std::vector>> OutgoingRequestBridge::getInitData() const { - std::cout << "getInitData() called!" << std::endl; return initData; } @@ -119,7 +118,7 @@ void OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) { // This should not receive any functional accesses - gem5::MemCmd::Command pktCmd = (gem5::MemCmd::Command)pkt->cmd.toInt(); + // gem5::MemCmd::Command pktCmd = (gem5::MemCmd::Command)pkt->cmd.toInt(); // std::cout << "Recv Functional : 0x" << std::hex << pkt->getAddr() << // std::dec << " " << pktCmd << " " << gem5::MemCmd::WriteReq << " " << // getInitPhaseStatus() << std::endl; @@ -133,13 +132,16 @@ OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) std::vector data(ptr, ptr+size); initData.push_back(std::make_pair(pkt->getAddr(), data)); } - // This is the RUN phase. + // This is the RUN phase. SST does not allow any sendUntimedData (AKA + // functional accesses) to it's memory. We need to convert these accesses + // to timing to at least store the correct data in the memory. else { // These packets have to translated at runtime. We convert these // packets to timing as its data has to be stored correctly in SST - // memory. - // - // Ensure that these packets are write requests. + // memory. Otherwise reads from the SST memory will fail. To reproduce + // this error, don not handle any functional accesses and the kernel + // boot will fail while reading the correct partition from the vda + // device. sstResponder->handleRecvFunctional(pkt); } } From 208512c9b553b1db2ea1b46d5be4063f0ea648f1 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Tue, 21 Nov 2023 18:46:52 -0800 Subject: [PATCH 07/23] ext: outgoing bridge supports limited func accesses This change adds a lot of new features to the gem5-SST bridge. The following list of changes describes each of them: 1. The bridge now supports limited functional accesses, which enables users to work with vda devices in ARM/RISCV systems. 2. There are new methods added to the bridge which allows future extension of the bridge to swap ports to enable delayed INIT phase for SST. This will allow the bridge to start the system in gem5 and then swap the memory ports to SST at RUN phase. 3. A lot of stray debug statements/cout statements were removed in this patch. 4. New documentation was added to the SST component for gem5. Signed-off-by: Kaustav Goswami --- ext/sst/gem5.cc | 48 +-------- ext/sst/gem5.hh | 16 ++- ext/sst/sst_responder_subcomponent.cc | 142 +------------------------- ext/sst/sst_responder_subcomponent.hh | 9 -- ext/sst/translator.hh | 108 +------------------- src/sst/outgoing_request_bridge.cc | 13 +-- 6 files changed, 14 insertions(+), 322 deletions(-) diff --git a/ext/sst/gem5.cc b/ext/sst/gem5.cc index ada3faa2e5..f9357937a6 100644 --- a/ext/sst/gem5.cc +++ b/ext/sst/gem5.cc @@ -182,30 +182,6 @@ gem5Component::gem5Component(SST::ComponentId_t id, SST::Params& params): // Split the port names using the util method defined. splitPortNames(ports); for (int i = 0 ; i < sstPortCount ; i++) { - std::cout << sstPortNames[i] << std::endl; - sstPorts.push_back( - loadUserSubComponent(sstPortNames[i], 0) - ); - // If the name defined in the `ports` is incorrect, then the program - // will crash when calling `setTimeConverter`. - sstPorts[i]->setTimeConverter(timeConverter); - sstPorts[i]->setOutputStream(&(output)); - } - // We need to add another parameter when invoking gem5 scripts from SST to - // keep a track of all the OutgoingBridges. This will allow to add or - // remove OutgoingBridges from gem5 configs without the need to recompile - // the ext/sst source everytime. - std::string ports = params.find("ports", ""); - if (ports.empty()) { - output.fatal( - CALL_INFO, -1, "Component %s must have a 'ports' parameter.\n", - getName().c_str() - ); - } - // Split the port names using the util method defined. - splitPortNames(ports); - for (int i = 0 ; i < sstPortCount ; i++) { - std::cout << sstPortNames[i] << std::endl; sstPorts.push_back( loadUserSubComponent(sstPortNames[i], 0) ); @@ -248,9 +224,6 @@ gem5Component::init(unsigned phase) for (auto &port : sstPorts) { port->findCorrespondingSimObject(gem5_root); } - for (auto &port : sstPorts) { - port->findCorrespondingSimObject(gem5_root); - } // initialize the gem5 event queue if (!(threadInitialized)) { @@ -266,9 +239,6 @@ gem5Component::init(unsigned phase) for (auto &port : sstPorts) { port->init(phase); } - for (auto &port : sstPorts) { - port->init(phase); - } } void @@ -278,9 +248,6 @@ gem5Component::setup() for (auto &port : sstPorts) { port->setup(); } - for (auto &port : sstPorts) { - port->setup(); - } } void @@ -479,17 +446,4 @@ gem5Component::splitPortNames(std::string port_names) sstPortNames.push_back(strdup(part.c_str())); sstPortCount++; } -} - -void -gem5Component::splitPortNames(std::string port_names) -{ - std::vector parsed_args = tokenizeString( - port_names, {'\\', ' ', '\'', '\"'} - ); - sstPortCount = 0; - for (auto part: parsed_args) { - sstPortNames.push_back(strdup(part.c_str())); - sstPortCount++; - } -} +} \ No newline at end of file diff --git a/ext/sst/gem5.hh b/ext/sst/gem5.hh index cf15af0ac8..3e08686f5e 100644 --- a/ext/sst/gem5.hh +++ b/ext/sst/gem5.hh @@ -148,16 +148,12 @@ class gem5Component: public SST::Component {"cmd", "command to run gem5's config"} ) - // SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( - // // These are the generally expected ports. - // {"system_port", "Connection to gem5 system_port", "gem5.gem5Bridge"}, - // {"cache_port", "Connection to gem5 CPU", "gem5.gem5Bridge"} - // ) - // SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( - // // These are the generally expected ports. - // {"system_port", "Connection to gem5 system_port", "gem5.gem5Bridge"}, - // {"cache_port", "Connection to gem5 CPU", "gem5.gem5Bridge"} - // ) + SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( + // These are the generally expected ports. + {"ports", + "Connection to gem5's outgoing ports to SST's ports", + "gem5.gem5Bridge"} + ) }; diff --git a/ext/sst/sst_responder_subcomponent.cc b/ext/sst/sst_responder_subcomponent.cc index 6d7ce35529..28c949690f 100644 --- a/ext/sst/sst_responder_subcomponent.cc +++ b/ext/sst/sst_responder_subcomponent.cc @@ -64,12 +64,6 @@ SSTResponderSubComponent::setTimeConverter(SST::TimeConverter* tc) // SHARE_PORTS means the interface can use our port as if it were its own // INSERT_STATS means the interface will inherit our statistic // configuration (e.g., if ours are enabled, the interface’s will be too) - memoryInterface = loadAnonymousSubComponent( - "memHierarchy.standardInterface", "memory", 0, - SST::ComponentInfo::SHARE_PORTS | SST::ComponentInfo::INSERT_STATS, - interface_params, timeConverter, - new SST::Interfaces::StandardMem::Handler( - this, &SSTResponderSubComponent::portEventHandler) memoryInterface = loadAnonymousSubComponent( "memHierarchy.standardInterface", "memory", 0, SST::ComponentInfo::SHARE_PORTS | SST::ComponentInfo::INSERT_STATS, @@ -97,9 +91,7 @@ SSTResponderSubComponent::setResponseReceiver( bool SSTResponderSubComponent::handleTimingReq( SST::Interfaces::StandardMem::Request* request) - SST::Interfaces::StandardMem::Request* request) { - memoryInterface->send(request); memoryInterface->send(request); return true; } @@ -118,15 +110,6 @@ SSTResponderSubComponent::init(unsigned phase) } responseReceiver->initData.clear(); responseReceiver->initPhaseComplete(true); - std::cout << "Init phase is complete " << responseReceiver->getInitPhaseStatus() << std::endl; - SST::Interfaces::StandardMem::Request* request = \ - new SST::Interfaces::StandardMem::Write( - addr, data.size(), data); - memoryInterface->sendUntimedData(request); - } - responseReceiver->initData.clear(); - responseReceiver->initPhaseComplete(true); - std::cout << "Init phase is complete " << responseReceiver->getInitPhaseStatus() << std::endl; } memoryInterface->init(phase); } @@ -149,20 +132,16 @@ SSTResponderSubComponent::findCorrespondingSimObject(gem5::Root* gem5_root) void SSTResponderSubComponent::handleSwapReqResponse( SST::Interfaces::StandardMem::Request* request) - SST::Interfaces::StandardMem::Request* request) { // get the data, then, // 1. send a response to gem5 with the original data // 2. send a write to memory with atomic op applied - SST::Interfaces::StandardMem::Request::id_t request_id = request->getID(); SST::Interfaces::StandardMem::Request::id_t request_id = request->getID(); TPacketMap::iterator it = sstRequestIdToPacketMap.find(request_id); assert(it != sstRequestIdToPacketMap.end()); std::vector data = \ dynamic_cast(request)->data; - std::vector data = \ - dynamic_cast(request)->data; // step 1 gem5::PacketPtr pkt = it->second; @@ -170,10 +149,6 @@ SSTResponderSubComponent::handleSwapReqResponse( dynamic_cast( request)->data.data() ); - pkt->setData( - dynamic_cast( - request)->data.data() - ); pkt->makeAtomicResponse(); pkt->headerDelay = pkt->payloadDelay = 0; if (blocked() || !responseReceiver->sendTimingResp(pkt)) @@ -184,11 +159,6 @@ SSTResponderSubComponent::handleSwapReqResponse( // This is a Write. Need to use the Write visitor class. But the original // request is a read response. Therefore, we need to find the address and // the data size and then call Write. - SST::Interfaces::StandardMem::Addr addr = \ - dynamic_cast(request)->pAddr; - // This is a Write. Need to use the Write visitor class. But the original - // request is a read response. Therefore, we need to find the address and - // the data size and then call Write. SST::Interfaces::StandardMem::Addr addr = \ dynamic_cast(request)->pAddr; auto data_size = data.size(); @@ -199,13 +169,6 @@ SSTResponderSubComponent::handleSwapReqResponse( // visitor classes. This has to be addressed in the future. The boot test // works without using ReadLock and WriteUnlock classes. memoryInterface->send(write_request); - // Create the Write request here. - SST::Interfaces::StandardMem::Request* write_request = \ - new SST::Interfaces::StandardMem::Write(addr, data_size, data); - // F_LOCKED flag in SimpleMem was changed to ReadLock and WriteUnlock - // visitor classes. This has to be addressed in the future. The boot test - // works without using ReadLock and WriteUnlock classes. - memoryInterface->send(write_request); delete request; } @@ -213,11 +176,9 @@ SSTResponderSubComponent::handleSwapReqResponse( void SSTResponderSubComponent::portEventHandler( SST::Interfaces::StandardMem::Request* request) - SST::Interfaces::StandardMem::Request* request) { // Expect to handle an SST response SST::Interfaces::StandardMem::Request::id_t request_id = request->getID(); - SST::Interfaces::StandardMem::Request::id_t request_id = request->getID(); TPacketMap::iterator it = sstRequestIdToPacketMap.find(request_id); @@ -237,17 +198,11 @@ SSTResponderSubComponent::portEventHandler( Translator::inplaceSSTRequestToGem5PacketPtr(pkt, request); - if (blocked() || !(responseReceiver->sendTimingResp(pkt))) { if (blocked() || !(responseReceiver->sendTimingResp(pkt))) { responseQueue.push(pkt); } } else { - // we can handle unexpected invalidates, but nothing else. - if (SST::Interfaces::StandardMem::Read* test = - dynamic_cast(request)) { - } - } else { - // we can handle unexpected invalidates, but nothing else. + // we can handle a few types of requests. if (SST::Interfaces::StandardMem::Read* test = dynamic_cast(request)) { return; @@ -255,46 +210,6 @@ SSTResponderSubComponent::portEventHandler( else if (SST::Interfaces::StandardMem::ReadResp* test = dynamic_cast( request)) { - // functional calls this - // gem5::RequestPtr req = std::make_shared( - // dynamic_cast( - // request)->pAddr, - // dynamic_cast( - // request)->size, 0, 0); - - // gem5::PacketPtr pkt = new gem5::Packet( - // req, gem5::MemCmd::ReadResp); - - // // Clear out bus delay notifications - // pkt->headerDelay = pkt->payloadDelay = 0; - // if (!(responseReceiver->sendTimingResp(pkt))) - // responseQueue.push(pkt); - return; - } - else if (SST::Interfaces::StandardMem::WriteResp* test = - dynamic_cast( - request)) { - return; - } - // for Snoop/no response needed - } - else if (SST::Interfaces::StandardMem::ReadResp* test = - dynamic_cast( - request)) { - // functional calls this - // gem5::RequestPtr req = std::make_shared( - // dynamic_cast( - // request)->pAddr, - // dynamic_cast( - // request)->size, 0, 0); - - // gem5::PacketPtr pkt = new gem5::Packet( - // req, gem5::MemCmd::ReadResp); - - // // Clear out bus delay notifications - // pkt->headerDelay = pkt->payloadDelay = 0; - // if (!(responseReceiver->sendTimingResp(pkt))) - // responseQueue.push(pkt); return; } else if (SST::Interfaces::StandardMem::WriteResp* test = @@ -309,10 +224,6 @@ SSTResponderSubComponent::portEventHandler( request)->pAddr, dynamic_cast( request)->size, 0, 0); - dynamic_cast( - request)->pAddr, - dynamic_cast( - request)->size, 0, 0); gem5::PacketPtr pkt = new gem5::Packet( req, gem5::MemCmd::InvalidateReq); @@ -342,7 +253,6 @@ SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) // has all the stored in it's memory, any functional access made to SST has // to be correctly handled. The idea here is to convert this timing access // into a timing access and keep the SST memory consistent. - std::cout << "handleRecvFunc was called! Need to do something here!" << std::endl; gem5::Addr addr = pkt->getAddr(); uint8_t* ptr = pkt->getPtr(); @@ -375,56 +285,6 @@ SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) SST::Interfaces::StandardMem::Request::Flag::F_NONCACHEABLE); } memoryInterface->send(request); - // memoryInterface->sendUntimedData(request); - - // memoryInterface->init(phase); - // sst does not understand what is a functional request. queue this as a - // regular memory packet. - - // SST does not understand what is a functional access in gem5 since SST - // only allows functional accesses at init time. Since it - // has all the stored in it's memory, any functional access made to SST has - // to be correctly handled. The idea here is to convert this timing access - // into a timing access and keep the SST memory consistent. - std::cout << "handleRecvFunc was called! Need to do something here!" << std::endl; - - gem5::Addr addr = pkt->getAddr(); - uint8_t* ptr = pkt->getPtr(); - uint64_t size = pkt->getSize(); - - // Create a new request to handle this request immediately. - SST::Interfaces::StandardMem::Request* request = nullptr; - - // we need a minimal translator here which does reads and writes. Any other - // command type is unexpected and the program should crash immediately. - switch((gem5::MemCmd::Command)pkt->cmd.toInt()) { - case gem5::MemCmd::WriteReq: { - std::vector data(ptr, ptr+size); - request = new SST::Interfaces::StandardMem::Write( - addr, data.size(), data); - break; - } - case gem5::MemCmd::ReadReq: { - request = new SST::Interfaces::StandardMem::Read(addr, size); - break; - } - default: - panic( - "handleRecvFunctional: Unable to convert gem5 packet: %s\n", - pkt->cmd.toString() - ); - } - if(pkt->req->isUncacheable()) { - request->setFlag( - SST::Interfaces::StandardMem::Request::Flag::F_NONCACHEABLE); - } - memoryInterface->send(request); - // memoryInterface->sendUntimedData(request); - - // memoryInterface->init(phase); - // sst does not understand what is a functional request. queue this as a - // regular memory packet. - } bool diff --git a/ext/sst/sst_responder_subcomponent.hh b/ext/sst/sst_responder_subcomponent.hh index cbf098aefe..ed9f09d6b8 100644 --- a/ext/sst/sst_responder_subcomponent.hh +++ b/ext/sst/sst_responder_subcomponent.hh @@ -38,7 +38,6 @@ #include #include #include -#include #include #include @@ -58,13 +57,11 @@ class SSTResponderSubComponent: public SST::SubComponent gem5::OutgoingRequestBridge* responseReceiver; gem5::SSTResponderInterface* sstResponder; - SST::Interfaces::StandardMem* memoryInterface; SST::Interfaces::StandardMem* memoryInterface; SST::TimeConverter* timeConverter; SST::Output* output; std::queue responseQueue; - std::vector initRequests; std::vector initRequests; std::string gem5SimObjectName; @@ -80,7 +77,6 @@ class SSTResponderSubComponent: public SST::SubComponent void setResponseReceiver(gem5::OutgoingRequestBridge* gem5_bridge); void portEventHandler(SST::Interfaces::StandardMem::Request* request); - void portEventHandler(SST::Interfaces::StandardMem::Request* request); bool blocked(); void setup(); @@ -88,22 +84,18 @@ class SSTResponderSubComponent: public SST::SubComponent // return true if the SimObject could be found bool findCorrespondingSimObject(gem5::Root* gem5_root); - bool handleTimingReq(SST::Interfaces::StandardMem::Request* request); bool handleTimingReq(SST::Interfaces::StandardMem::Request* request); void handleRecvRespRetry(); void handleRecvFunctional(gem5::PacketPtr pkt); void handleSwapReqResponse(SST::Interfaces::StandardMem::Request* request); - void handleSwapReqResponse(SST::Interfaces::StandardMem::Request* request); TPacketMap sstRequestIdToPacketMap; public: // register the component to SST SST_ELI_REGISTER_SUBCOMPONENT_API(SSTResponderSubComponent); - SST_ELI_REGISTER_SUBCOMPONENT( SST_ELI_REGISTER_SUBCOMPONENT( SSTResponderSubComponent, "gem5", // SST will look for libgem5.so or libgem5.dylib - "gem5", // SST will look for libgem5.so or libgem5.dylib "gem5Bridge", SST_ELI_ELEMENT_VERSION(1, 0, 0), "Initialize gem5 and link SST's ports to gem5's ports", @@ -113,7 +105,6 @@ class SSTResponderSubComponent: public SST::SubComponent SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( {"memory", "Interface to the memory subsystem", \ "SST::Interfaces::StandardMem"} - "SST::Interfaces::StandardMem"} ) SST_ELI_DOCUMENT_PORTS( diff --git a/ext/sst/translator.hh b/ext/sst/translator.hh index 579a9f1899..bf6a168d9a 100644 --- a/ext/sst/translator.hh +++ b/ext/sst/translator.hh @@ -27,21 +27,18 @@ #ifndef __TRANSLATOR_H__ #define __TRANSLATOR_H__ -#include #include #include #include #include #include -typedef std::unordered_map TPacketMap; namespace Translator { -inline SST::Interfaces::StandardMem::Request* inline SST::Interfaces::StandardMem::Request* gem5RequestToSSTRequest(gem5::PacketPtr pkt, TPacketMap& sst_request_id_to_packet_map) @@ -64,35 +61,6 @@ gem5RequestToSSTRequest(gem5::PacketPtr pkt, CustomResp, InvNotify - }; - // SST's standard memory class has visitor classes for all the different - // types of memory commands. Request class now does not have a command - // variable. Instead for different types of request, we now need to - // dynamically cast the class object. I'm using an extra variable to map - // the type of command for SST. - int sst_command_type = -1; - // StandardMem only has one cache flush class with an option to flush or - // flush and invalidate an address. By default, this is set to true so that - // it corresponds to ge,::MemCmd::InvalidateReq - bool flush_addr_flag = true; - // Listing all the different SST Memory commands. - enum sst_standard_mem_commands - { - Read, - ReadResp, - Write, - WriteResp, - FlushAddr, - FlushResp, - ReadLock, - WriteUnlock, - LoadLink, - StoreConditional, - MoveData, - CustomReq, - CustomResp, - InvNotify - }; // SST's standard memory class has visitor classes for all the different // types of memory commands. Request class now does not have a command @@ -112,30 +80,21 @@ gem5RequestToSSTRequest(gem5::PacketPtr pkt, case gem5::MemCmd::ReadExReq: case gem5::MemCmd::ReadCleanReq: case gem5::MemCmd::ReadSharedReq: - case gem5::MemCmd::ReadCleanReq: - case gem5::MemCmd::ReadSharedReq: case gem5::MemCmd::ReadReq: case gem5::MemCmd::SwapReq: - sst_command_type = Read; sst_command_type = Read; break; case gem5::MemCmd::StoreCondReq: case gem5::MemCmd::WritebackDirty: case gem5::MemCmd::WritebackClean: - case gem5::MemCmd::WritebackDirty: - case gem5::MemCmd::WritebackClean: case gem5::MemCmd::WriteReq: - sst_command_type = Write; sst_command_type = Write; break; case gem5::MemCmd::CleanInvalidReq: case gem5::MemCmd::InvalidateReq: - sst_command_type = FlushAddr; sst_command_type = FlushAddr; break; case gem5::MemCmd::CleanSharedReq: - sst_command_type = FlushAddr; - flush_addr_flag = false; sst_command_type = FlushAddr; flush_addr_flag = false; break; @@ -143,7 +102,6 @@ gem5RequestToSSTRequest(gem5::PacketPtr pkt, panic("Unable to convert gem5 packet: %s\n", pkt->cmd.toString()); } - SST::Interfaces::StandardMem::Addr addr = pkt->getAddr(); SST::Interfaces::StandardMem::Addr addr = pkt->getAddr(); auto data_size = pkt->getSize(); std::vector data; @@ -153,40 +111,6 @@ gem5RequestToSSTRequest(gem5::PacketPtr pkt, uint8_t* data_ptr = pkt->getPtr(); data = std::vector(data_ptr, data_ptr + data_size); - } - // Now convert a sst StandardMem request. - SST::Interfaces::StandardMem::Request* request = nullptr; - // find the corresponding memory command type. - switch(sst_command_type) { - case Read: - request = new SST::Interfaces::StandardMem::Read(addr, data_size); - break; - case Write: - request = - new SST::Interfaces::StandardMem::Write(addr, data_size, data); - break; - case FlushAddr: { - // StandardMem::FlushAddr has a invoking variable called `depth` - // which defines the number of cache levels to invalidate. Ideally - // this has to be input from the SST config, however in - // implementation I'm hardcoding this value to 2. - int cache_depth = 2; - request = - new SST::Interfaces::StandardMem::FlushAddr( - addr, data_size, flush_addr_flag, cache_depth); - break; - } - default: - panic("Unable to translate command %d to Request class!", - sst_command_type); - } - std::vector data; - // Need to make sure that the command type is a Write to retrive the data - // data_ptr. - if (sst_command_type == Write) { - uint8_t* data_ptr = pkt->getPtr(); - data = std::vector(data_ptr, data_ptr + data_size); - } // Now convert a sst StandardMem request. SST::Interfaces::StandardMem::Request* request = nullptr; @@ -220,45 +144,27 @@ gem5RequestToSSTRequest(gem5::PacketPtr pkt, || pkt->req->isLockedRMW()) { // F_LOCKED is deprecated. Therefore I'm skipping this flag for the // StandardMem request. - } else if ((gem5::MemCmd::Command)pkt->cmd.toInt() == - // F_LOCKED is deprecated. Therefore I'm skipping this flag for the - // StandardMem request. } else if ((gem5::MemCmd::Command)pkt->cmd.toInt() == gem5::MemCmd::StoreCondReq) { // F_LLSC is deprecated. Therefore I'm skipping this flag for the // StandardMem request. - // F_LLSC is deprecated. Therefore I'm skipping this flag for the - // StandardMem request. } if (pkt->req->isUncacheable()) { request->setFlag( SST::Interfaces::StandardMem::Request::Flag::F_NONCACHEABLE); - request->setFlag( - SST::Interfaces::StandardMem::Request::Flag::F_NONCACHEABLE); } if (pkt->needsResponse()) sst_request_id_to_packet_map[request->getID()] = pkt; - - // if(gem5::curTick() > 340330000000) - // std::cout << request->getString() << std::endl; - sst_request_id_to_packet_map[request->getID()] = pkt; - - // if(gem5::curTick() > 340330000000) - // std::cout << request->getString() << std::endl; + return request; } inline void inplaceSSTRequestToGem5PacketPtr(gem5::PacketPtr pkt, SST::Interfaces::StandardMem::Request* request) - SST::Interfaces::StandardMem::Request* request) { - // if(gem5::curTick() > 340330000000) - // std::cout << request->getString() << std::endl; - // if(gem5::curTick() > 340330000000) - // std::cout << request->getString() << std::endl; pkt->makeResponse(); // Resolve the success of Store Conditionals @@ -278,18 +184,6 @@ inplaceSSTRequestToGem5PacketPtr(gem5::PacketPtr pkt, ); } } - // If there is data in the request, send it back. Only ReadResp requests - // have data associated with it. Other packets does not need to be casted. - if (!pkt->isWrite()) { - // Need to verify whether the packet is a ReadResp, otherwise the - // program will try to incorrectly cast the request object. - if (SST::Interfaces::StandardMem::ReadResp* test = - dynamic_cast(request)) { - pkt->setData(dynamic_cast( - request)->data.data() - ); - } - } // Clear out bus delay notifications pkt->headerDelay = pkt->payloadDelay = 0; diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 74426d778c..d1bbdc6d5d 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -117,16 +117,10 @@ OutgoingRequestBridge::getInitPhaseStatus() { void OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) { - // This should not receive any functional accesses - // gem5::MemCmd::Command pktCmd = (gem5::MemCmd::Command)pkt->cmd.toInt(); - // std::cout << "Recv Functional : 0x" << std::hex << pkt->getAddr() << - // std::dec << " " << pktCmd << " " << gem5::MemCmd::WriteReq << " " << - // getInitPhaseStatus() << std::endl; // Check at which stage are we at. If we are at INIT phase, then queue all // these packets. if (!getInitPhaseStatus()) { - // sstResponder->recvAtomic(pkt); uint8_t* ptr = pkt->getPtr(); uint64_t size = pkt->getSize(); std::vector data(ptr, ptr+size); @@ -139,9 +133,12 @@ OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) // These packets have to translated at runtime. We convert these // packets to timing as its data has to be stored correctly in SST // memory. Otherwise reads from the SST memory will fail. To reproduce - // this error, don not handle any functional accesses and the kernel + // this error, do not handle any functional accesses and the kernel // boot will fail while reading the correct partition from the vda - // device. + // device. this is a hacky solution to solve functional accesses in the + // gem5 sst bridge. there are instances where the vda device will not + // work correctly. to reproduce errors, use 8 O3 CPUs accessing the + // same SST memory across 16 or 32 instances of gem5. sstResponder->handleRecvFunctional(pkt); } } From b4d92cbcf33d1fc4da3129c457e6b2e179df8915 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Tue, 21 Nov 2023 18:58:48 -0800 Subject: [PATCH 08/23] ext: corrected minor changes in the outgoing bridge This change fixes minor changes with the gem5's outgoing bridge, mostly in comments and gem5 syle. Signed-off-by: Kaustav Goswami --- ext/sst/sst_responder_subcomponent.cc | 4 ++-- src/sst/outgoing_request_bridge.cc | 1 - src/sst/outgoing_request_bridge.hh | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ext/sst/sst_responder_subcomponent.cc b/ext/sst/sst_responder_subcomponent.cc index 28c949690f..2a7a325fc8 100644 --- a/ext/sst/sst_responder_subcomponent.cc +++ b/ext/sst/sst_responder_subcomponent.cc @@ -251,8 +251,8 @@ SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) // SST does not understand what is a functional access in gem5 since SST // only allows functional accesses at init time. Since it // has all the stored in it's memory, any functional access made to SST has - // to be correctly handled. The idea here is to convert this timing access - // into a timing access and keep the SST memory consistent. + // to be correctly handled. The idea here is to convert this functional + // access into a timing access and keep the SST memory consistent. gem5::Addr addr = pkt->getAddr(); uint8_t* ptr = pkt->getPtr(); diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index d1bbdc6d5d..7bfcd34e26 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -147,7 +147,6 @@ Tick OutgoingRequestBridge:: OutgoingRequestPort::recvAtomic(PacketPtr pkt) { - // return 0; assert(false && "OutgoingRequestPort::recvAtomic not implemented"); return Tick(); } diff --git a/src/sst/outgoing_request_bridge.hh b/src/sst/outgoing_request_bridge.hh index 0a3e2e2821..daef2cc96f 100644 --- a/src/sst/outgoing_request_bridge.hh +++ b/src/sst/outgoing_request_bridge.hh @@ -118,9 +118,9 @@ class OutgoingRequestBridge: public SimObject void initPhaseComplete(bool value); // We read the value of the init_phase_bool using `getInitPhaseStatus` - // method. - + // method. This methids will be used later to swap memory ports. bool getInitPhaseStatus(); + // gem5 Component (from SST) will call this function to let set the // bridge's corresponding SSTResponderSubComponent (which implemented // SSTResponderInterface). I.e., this will connect this bridge to the From 4344c1c36eb25698fe4bf62d0f7e641217bd96e1 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Tue, 21 Nov 2023 19:01:03 -0800 Subject: [PATCH 09/23] ext,configs: drops and adds numa-enabled stdlib boards This change drops all the boards and configs created previously to simulate disaggregated memory systems as those were not strictly adhering to the stdlib rules. This new change posts most of the previously written code to create a new abstract disaggregated memory board for both ARM and RISCV architectures, which minimizes code duplication and can be easily extended. Extension suggestions and tips are now commented thoughout the python files. This is a proper directory structure to navigate these changes. Signed-off-by: Kaustav Goswami --- .../boards/arm_dm_board.py | 90 ++- disaggregated_memory/boards/arm_gem5_board.py | 198 +++++++ disaggregated_memory/boards/arm_sst_board.py | 238 ++++++++ .../boards}/riscv_dm_board.py | 148 +++-- .../boards/riscv_gem5_board.py | 187 +++++++ .../boards/riscv_sst_board.py | 230 ++++---- .../boards}/x86_dm_board.py | 0 .../cachehierarchies}/dm_caches.py | 7 +- .../cachehierarchies/dm_caches_sst.py | 8 +- .../cachehierarchies}/mi_example_dm_caches.py | 0 .../configs/arm-gem5-numa-nodes.py | 141 +++++ .../configs/arm-sst-numa-nodes.py | 130 +++++ .../configs/riscv-gem5-numa-nodes.py | 100 ++-- .../configs/riscv-sst-numa-nodes.py | 89 ++- .../memories/external_remote_memory.py | 86 +++ .../memories}/remote_memory.py | 2 +- disaggregated_memory_setup/arm_sst_board.py | 220 -------- disaggregated_memory_setup/numa_config_sst.py | 131 ----- .../numa_config_sst_nodes.py | 172 ------ .../numa_config_sst_nodes_arm.py | 139 ----- .../numa_config_w_ruby_caches.py | 160 ------ disaggregated_memory_setup/numa_config_x86.py | 129 ----- disaggregated_memory_setup/riscv_sst_board.py | 514 ------------------ .../simulator_project.ipynb | 159 ------ disaggregated_memory_setup/traffic_gen_sst.py | 136 ----- 25 files changed, 1362 insertions(+), 2052 deletions(-) rename disaggregated_memory_setup/arm_dm_sst_board.py => disaggregated_memory/boards/arm_dm_board.py (71%) create mode 100644 disaggregated_memory/boards/arm_gem5_board.py create mode 100644 disaggregated_memory/boards/arm_sst_board.py rename {disaggregated_memory_setup => disaggregated_memory/boards}/riscv_dm_board.py (78%) create mode 100644 disaggregated_memory/boards/riscv_gem5_board.py rename disaggregated_memory_setup/test_board_sst.py => disaggregated_memory/boards/riscv_sst_board.py (50%) rename {disaggregated_memory_setup => disaggregated_memory/boards}/x86_dm_board.py (100%) rename {disaggregated_memory_setup => disaggregated_memory/cachehierarchies}/dm_caches.py (95%) rename disaggregated_memory_setup/dm_caches_arm.py => disaggregated_memory/cachehierarchies/dm_caches_sst.py (94%) rename {disaggregated_memory_setup => disaggregated_memory/cachehierarchies}/mi_example_dm_caches.py (100%) create mode 100644 disaggregated_memory/configs/arm-gem5-numa-nodes.py create mode 100644 disaggregated_memory/configs/arm-sst-numa-nodes.py rename disaggregated_memory_setup/numa_config_w_delay.py => disaggregated_memory/configs/riscv-gem5-numa-nodes.py (61%) rename disaggregated_memory_setup/numa_config.py => disaggregated_memory/configs/riscv-sst-numa-nodes.py (63%) create mode 100644 disaggregated_memory/memories/external_remote_memory.py rename {disaggregated_memory_setup => disaggregated_memory/memories}/remote_memory.py (99%) delete mode 100644 disaggregated_memory_setup/arm_sst_board.py delete mode 100644 disaggregated_memory_setup/numa_config_sst.py delete mode 100644 disaggregated_memory_setup/numa_config_sst_nodes.py delete mode 100644 disaggregated_memory_setup/numa_config_sst_nodes_arm.py delete mode 100644 disaggregated_memory_setup/numa_config_w_ruby_caches.py delete mode 100644 disaggregated_memory_setup/numa_config_x86.py delete mode 100644 disaggregated_memory_setup/riscv_sst_board.py delete mode 100644 disaggregated_memory_setup/simulator_project.ipynb delete mode 100644 disaggregated_memory_setup/traffic_gen_sst.py diff --git a/disaggregated_memory_setup/arm_dm_sst_board.py b/disaggregated_memory/boards/arm_dm_board.py similarity index 71% rename from disaggregated_memory_setup/arm_dm_sst_board.py rename to disaggregated_memory/boards/arm_dm_board.py index 5ec6b28d90..fb965d51b2 100644 --- a/disaggregated_memory_setup/arm_dm_sst_board.py +++ b/disaggregated_memory/boards/arm_dm_board.py @@ -25,6 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from m5.objects import ( + Port, AddrRange, VoltageDomain, SrcClockDomain, @@ -62,31 +63,89 @@ from typing import List, Sequence, Tuple -class ArmDMSSTBoard(ArmBoard): +class ArmAbstractDMBoard(ArmBoard): + """ + A high-level ARM board that can zNUMA-capable systems with a remote + memories. This board is extended from the ArmBoard from Gem5 standard + library. This board assumes that you will be booting Linux. This board can + be used to do disaggregated ARM system research while accelerating the + simulation using kvm. + + **Limitations** + * kvm is only supported in a gem5-only setup. + """ + __metaclass__ = ABCMeta def __init__( self, clk_freq: str, processor: AbstractProcessor, - memory: AbstractMemorySystem, + local_memory: AbstractMemorySystem, cache_hierarchy: AbstractCacheHierarchy, - remote_memory_range: AddrRange, + remote_memory_addr_range: AddrRange, platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), release: ArmRelease = ArmDefaultRelease(), ) -> None: - - self._remote_memory_range = remote_memory_range + # The structure of this board is similar to the RISCV DM board. + self._localMemory = local_memory + # remote_memory can either be an interface or an external memory + # This abstract disaggregated memory does not know what this type of + # memory is. it only needs to know the address range for this memory. + # from this range, we'll figure out the size. + self._remoteMemoryAddrRange = remote_memory_addr_range super().__init__( clk_freq=clk_freq, processor=processor, - memory=memory, + memory=local_memory, cache_hierarchy=cache_hierarchy, platform=platform, release=release, ) + self.local_memory = local_memory + + @overrides(ArmBoard) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + This has to be implemeted by the child class as we don't know if + this board is simulating Gem5 memory or some external simulator + memory. + :returns: The remote memory system. + """ + raise NotImplementedError + + def get_remote_memory_size(self) -> "str": + """Get the remote memory size to setup the NUMA nodes.""" + return self._remoteMemoryAddrRange.size() + + @overrides(ArmBoard) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + """Get the memory (RAM) ports connected to the board. + This has to be implemeted by the child class as we don't know if + this board is simulating Gem5 memory or some external simulator + memory. + :returns: A tuple of mem_ports. + """ + raise NotImplementedError def get_remote_memory_addr_range(self): + raise NotImplementedError return self._remote_memory_range @overrides(ArmBoard) @@ -136,8 +195,8 @@ def _setup_board(self) -> None: # Once the realview is setup, we can continue setting up the memory # ranges. ArmBoard's memory can only be setup once realview is # initialized. - memory = self.get_memory() - mem_size = memory.get_size() + local_memory = self.get_local_memory() + mem_size = local_memory.get_size() # The following code is taken from configs/example/arm/devices.py. It # sets up all the memory ranges for the board. @@ -156,16 +215,22 @@ def _setup_board(self) -> None: break if success: - memory.set_memory_range(self.mem_ranges) + local_memory.set_memory_range(self.mem_ranges) else: raise ValueError("Memory size too big for platform capabilities") - - self.mem_ranges.append(self.get_remote_memory_addr_range()) + # At the end of the local_memory, append the remote memory range. + self.mem_ranges.append(self._remoteMemoryAddrRange) # The PCI Devices. PCI devices can be added via the `_add_pci_device` # function. self._pci_devices = [] + # set remtoe memory in the child board + self._set_remote_memory_ranges() + + def _set_remote_memory_ranges(self): + raise NotImplementedError + @overrides(ArmSystem) def generateDeviceTree(self, state): # Generate a device tree root node for the system by creating the root @@ -194,7 +259,7 @@ def generateMemNode(numa_node_id, mem_range): # Add memory nodes for mem_range in self.mem_ranges: root.append(generateMemNode(0, mem_range)) - root.append(generateMemNode(1, self.get_remote_memory_addr_range())) + root.append(generateMemNode(1, self._remoteMemoryAddrRange)) for node in self.recurseDeviceTree(state): # Merge root nodes instead of adding them (for children @@ -216,5 +281,4 @@ def get_default_kernel_args(self) -> List[str]: "norandmaps", "root={root_value}", "rw", - f"mem={self.get_memory().get_size()}", ] diff --git a/disaggregated_memory/boards/arm_gem5_board.py b/disaggregated_memory/boards/arm_gem5_board.py new file mode 100644 index 0000000000..1323256f6f --- /dev/null +++ b/disaggregated_memory/boards/arm_gem5_board.py @@ -0,0 +1,198 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# import os +# import sys + +# # all the source files are one directory above. +# sys.path.append( +# os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +# ) + +from m5.objects import ( + Port, + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + ArmSystem, +) + +from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation +from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease +from m5.objects.ArmFsWorkload import ArmFsLinux + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + +import os +import m5 +from abc import ABCMeta + +from memories.remote_memory import RemoteChanneledMemory +from boards.arm_dm_board import ArmAbstractDMBoard + +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class ArmGem5DMBoard(ArmAbstractDMBoard): + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_addr_range: AddrRange = None, + platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), + release: ArmRelease = ArmDefaultRelease(), + ) -> None: + self._localMemory = local_memory + self._remoteMemory = remote_memory + # If the remote_memory_addr_range is not provided, we'll assume that + # it starts at 0x80000000 + local_memory_size and ends at it's own size + if remote_memory_addr_range is None: + remote_memory_addr_range = AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=remote_memory_addr_range, + cache_hierarchy=cache_hierarchy, + platform=platform, + release=release, + ) + self.local_memory = local_memory + self.remote_memory = remote_memory + + @overrides(ArmAbstractDMBoard) + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + @overrides(ArmAbstractDMBoard) + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_remote_memory().get_mem_ports() + + @overrides(ArmAbstractDMBoard) + def _set_remote_memory_ranges(self): + self.get_remote_memory().set_memory_range( + [self._remoteMemoryAddrRange] + ) + + @overrides(ArmAbstractDMBoard) + def get_default_kernel_args(self) -> List[str]: + + # The default kernel string is taken from the devices.py file. + return [ + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "root={root_value}", + "rw", + "init=/root/gem5-init.sh", + ] + + @overrides(ArmAbstractDMBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + # need to connect the remote links to the board. + if self.get_cache_hierarchy().is_ruby(): + fatal( + "remote memory is only supported in classic caches at " + + "the moment!") + if isinstance(self.get_remote_memory(), RemoteChanneledMemory): + for ports in self.get_remote_memory().remote_links: + self.get_cache_hierarchy().membus.mem_side_ports = \ + ports.cpu_side_ports + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(ArmAbstractDMBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory/boards/arm_sst_board.py b/disaggregated_memory/boards/arm_sst_board.py new file mode 100644 index 0000000000..5f30dc2d23 --- /dev/null +++ b/disaggregated_memory/boards/arm_sst_board.py @@ -0,0 +1,238 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# import os +# import sys + +# # all the source files are one directory above. +# sys.path.append( +# os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +# ) + +from m5.objects import ( + Port, + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + ArmSystem, +) + +from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation +from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease +from m5.objects.ArmFsWorkload import ArmFsLinux + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + +import os +import m5 +from abc import ABCMeta + +from memories.remote_memory import RemoteChanneledMemory +from boards.arm_dm_board import ArmAbstractDMBoard + +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class ArmSstDMBoard(ArmAbstractDMBoard): + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: "ExternalRemoteMemoryInterface", + cache_hierarchy: AbstractCacheHierarchy, + platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), + release: ArmRelease = ArmDefaultRelease(), + ) -> None: + self._localMemory = local_memory + # Since the remote memory is defined in SST's side, we only need the + # size of this memory while setting up stuff from Gem5's side. + self._remoteMemory = remote_memory + # The remote memory is either setup with a size or an address range. + # We need to determine if the address range is set. if not, then we + # need to find the starting and ending of the the external memory + # range. + if not self._remoteMemory.get_set_using_addr_ranges(): + # Address ranges were not set, but the system knows the size + # If the remote_memory_addr_range is not provided, we'll assume + # that it starts at 0x80000000 + local_memory_size and ends at it's + # own size + self._remoteMemory.remote_memory.physical_address_ranges = [ + AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + ] + # We need a size as a string to setup this memory. + self._remoteMemorySize = self._remoteMemory.get_size() + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=self._remoteMemory.remote_memory.physical_address_ranges[ + 0 + ], + cache_hierarchy=cache_hierarchy, + platform=platform, + release=release, + ) + self.local_memory = local_memory + self.remote_memory = self._remoteMemory.remote_memory + + @overrides(ArmAbstractDMBoard) + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + @overrides(ArmAbstractDMBoard) + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + + @overrides(ArmAbstractDMBoard) + def _set_remote_memory_ranges(self): + pass + # self.get_remote_memory().set_memory_range( + # [self._remoteMemoryAddrRange] + # ) + + @overrides(ArmAbstractDMBoard) + def get_default_kernel_args(self) -> List[str]: + + # The default kernel string is taken from the devices.py file. + return [ + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "root={root_value}", + "rw", + "init=/root/gem5-init.sh", + ] + + @overrides(ArmAbstractDMBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + # we need to find whether there is any external latency. if yes, then + # add xbar to add this latency. + + if self.get_remote_memory().is_xbar_required(): + self.remote_link = NoncoherentXBar( + frontend_latency=0, + forward_latency=0, + response_latency=self.get_remote_memory()._remote_memory_latency, + width=64, + ) + # connect the remote memory port to the remote link + self.get_remote_memory().remote_memory.port = ( + self.remote_link.mem_side_ports + ) + # The remote link is then connected to the membus + self.get_cache_hierarchy().membus.mem_side_ports = ( + self.remote_link.cpu_side_ports + ) + else: + # Connect the external memory directly to the motherboard. + self.get_remote_memory().remote_memory.port = ( + self.get_cache_hierarchy().membus.mem_side_ports + ) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + # need to connect the remote links to the board. + if self.get_cache_hierarchy().is_ruby(): + fatal( + "remote memory is only supported in classic caches at " + + "the moment!") + if isinstance(self.get_remote_memory(), RemoteChanneledMemory): + for ports in self.get_remote_memory().remote_links: + self.get_cache_hierarchy().membus.mem_side_ports = \ + ports.cpu_side_ports + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(ArmAbstractDMBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() diff --git a/disaggregated_memory_setup/riscv_dm_board.py b/disaggregated_memory/boards/riscv_dm_board.py similarity index 78% rename from disaggregated_memory_setup/riscv_dm_board.py rename to disaggregated_memory/boards/riscv_dm_board.py index 1a7acb909c..f9660748e8 100644 --- a/disaggregated_memory_setup/riscv_dm_board.py +++ b/disaggregated_memory/boards/riscv_dm_board.py @@ -57,14 +57,17 @@ ) -class RiscvDMBoard(RiscvBoard): +class RiscvAbstractDMBoard(RiscvBoard): """ - A board capable of full system simulation for RISC-V - At a high-level, this is based on the HiFive Unmatched board from SiFive. - This board assumes that you will be booting Linux. + A high-level RISCV board that can zNUMA-capable systems with a remote + memories. This board is extended from the RiscvBoard from Gem5 standard + library. At a high-level, this is based on the HiFive Unmatched board from + SiFive. This board assumes that you will be booting Linux. **Limitations** - * Only works with classic caches + * There is only one Plic and Clint controller supported by this board, + which make this board only capable of simulating zNUMA nodes and not + full-fledged NUMA nodes. """ def __init__( @@ -72,11 +75,15 @@ def __init__( clk_freq: str, processor: AbstractProcessor, local_memory: AbstractMemorySystem, - remote_memory: AbstractMemorySystem, + remote_memory_addr_range: AddrRange, cache_hierarchy: AbstractCacheHierarchy, ) -> None: self._localMemory = local_memory - self._remoteMemory = remote_memory + # remote_memory can either be an interface or an external memory + # This abstract disaggregated memory does not know what this type of + # memory is. it only needs to know the address range for this memory. + # from this range, we'll figure out the size. + self._remoteMemoryAddrRange = remote_memory_addr_range super().__init__( clk_freq=clk_freq, processor=processor, @@ -84,7 +91,6 @@ def __init__( cache_hierarchy=cache_hierarchy, ) self.local_memory = local_memory - self.remote_memory = remote_memory if processor.get_isa() != ISA.RISCV: raise Exception( @@ -109,38 +115,51 @@ def get_local_memory(self) -> "AbstractMemory": def get_remote_memory(self) -> "AbstractMemory": """Get the memory (RAM) connected to the board. + This has to be implemeted by the child class as we don't know if + this board is simulating Gem5 memory or some external simulator + memory. :returns: The remote memory system. """ - return self._remoteMemory + raise NotImplementedError + + def get_remote_memory_size(self) -> "str": + """Get the remote memory size to setup the NUMA nodes.""" + return self._remoteMemoryAddrRange.size() @overrides(AbstractSystemBoard) def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: return self.get_local_memory().get_mem_ports() def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: - return self.get_remote_memory().get_mem_ports() + """Get the memory (RAM) ports connected to the board. + This has to be implemeted by the child class as we don't know if + this board is simulating Gem5 memory or some external simulator + memory. + :returns: A tuple of mem_ports. + """ + raise NotImplementedError @overrides(AbstractSystemBoard) def _setup_memory_ranges(self): - # the memory has to be setup for both the memory ranges. there is one # local memory range, close to the host machine and the other range is # pure memory, far from the host. local_memory = self.get_local_memory() - remote_memory = self.get_remote_memory() + # remote_memory = self.get_remote_memory_size() local_mem_size = local_memory.get_size() - remote_mem_size = remote_memory.get_size() + remote_mem_size = self.get_remote_memory_size() + # local memory range will always start from 0x80000000. The remote + # memory can start and end anywhere as long as it is consistent + # with the dtb. self._local_mem_ranges = [ AddrRange(start=0x80000000, size=local_mem_size) ] - # The remote memory starts where the local memory ends. Therefore it - # has to be offset by the local memory's size. - self._remote_mem_ranges = [ - AddrRange(start=0x80000000 + local_mem_size, size=remote_mem_size) - ] + # The remote memory starts anywhere after the local memory ends. We + # rely on the user to start and end this range. + self._remote_mem_ranges = [self._remoteMemoryAddrRange] # using a _global_ memory range to keep a track of all the memory # ranges. This is used to generate the dtb for this machine @@ -148,9 +167,18 @@ def _setup_memory_ranges(self): self._global_mem_ranges.append(self._local_mem_ranges[0]) self._global_mem_ranges.append(self._remote_mem_ranges[0]) - # setting the memory ranges for both of the memory ranges. - local_memory.set_memory_range(self._local_mem_ranges) - remote_memory.set_memory_range(self._remote_mem_ranges) + # setting the memory ranges for both of the memory ranges. we cannot + # incorporate the memory at using this abstract board. + + self._incorporate_memory_range() + + def _incorporate_memory_range(self): + """ + The child board only can incorporate this memory range""" + + raise NotImplementedError( + "Cannot incorporte the memory using an Abstract-like board." + ) @overrides(RiscvBoard) def generate_device_tree(self, outdir: str) -> None: @@ -158,7 +186,6 @@ def generate_device_tree(self, outdir: str) -> None: Creates two files in the outdir: 'device.dtb' and 'device.dts' :param outdir: Directory to output the files """ - state = FdtState(addr_cells=2, size_cells=2, cpu_cells=1) root = FdtNode("/") root.append(state.addrCellsProperty()) @@ -401,54 +428,55 @@ def generate_device_tree(self, outdir: str) -> None: @overrides(KernelDiskWorkload) def get_default_kernel_args(self) -> List[str]: - # return ["console=ttyS0", "root={root_value}", "init=/root/gem5_init.sh", "rw"] + # return ["console=ttyS0", "root={root_value}", + # "init=/root/gem5_init.sh", "rw"] return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"] - @overrides(AbstractBoard) - def _connect_things(self) -> None: - """Connects all the components to the board. + # @overrides(AbstractBoard) + # def _connect_things(self) -> None: + # """Connects all the components to the board. - The order of this board is always: + # The order of this board is always: - 1. Connect the memory. - 2. Connect the cache hierarchy. - 3. Connect the processor. + # 1. Connect the memory. + # 2. Connect the cache hierarchy. + # 3. Connect the processor. - Developers may build upon this assumption when creating components. + # Developers may build upon this assumption when creating components. - Notes - ----- + # Notes + # ----- - * The processor is incorporated after the cache hierarchy due to a bug - noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this - bug is fixed, this ordering must be maintained. - * Once this function is called `_connect_things_called` *must* be set - to `True`. - """ + # * The processor is incorporated after the cache hierarchy due to a bug + # noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + # bug is fixed, this ordering must be maintained. + # * Once this function is called `_connect_things_called` *must* be set + # to `True`. + # """ - if self._connect_things_called: - raise Exception( - "The `_connect_things` function has already been called." - ) + # if self._connect_things_called: + # raise Exception( + # "The `_connect_things` function has already been called." + # ) - # Incorporate the memory into the motherboard. - self.get_local_memory().incorporate_memory(self) - self.get_remote_memory().incorporate_memory(self) + # # Incorporate the memory into the motherboard. + # self.get_local_memory().incorporate_memory(self) + # self.get_remote_memory().incorporate_memory(self) - # Incorporate the cache hierarchy for the motherboard. - if self.get_cache_hierarchy(): - self.get_cache_hierarchy().incorporate_cache(self) + # # Incorporate the cache hierarchy for the motherboard. + # if self.get_cache_hierarchy(): + # self.get_cache_hierarchy().incorporate_cache(self) - # Incorporate the processor into the motherboard. - self.get_processor().incorporate_processor(self) + # # Incorporate the processor into the motherboard. + # self.get_processor().incorporate_processor(self) - self._connect_things_called = True + # self._connect_things_called = True - @overrides(AbstractBoard) - def _post_instantiate(self): - """Called to set up anything needed after m5.instantiate""" - self.get_processor()._post_instantiate() - if self.get_cache_hierarchy(): - self.get_cache_hierarchy()._post_instantiate() - self.get_local_memory()._post_instantiate() - self.get_remote_memory()._post_instantiate() + # @overrides(AbstractBoard) + # def _post_instantiate(self): + # """Called to set up anything needed after m5.instantiate""" + # self.get_processor()._post_instantiate() + # if self.get_cache_hierarchy(): + # self.get_cache_hierarchy()._post_instantiate() + # self.get_local_memory()._post_instantiate() + # self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory/boards/riscv_gem5_board.py b/disaggregated_memory/boards/riscv_gem5_board.py new file mode 100644 index 0000000000..d304d00a5c --- /dev/null +++ b/disaggregated_memory/boards/riscv_gem5_board.py @@ -0,0 +1,187 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from typing import List, Optional, Sequence, Tuple + +from memories.remote_memory import RemoteChanneledMemory +from boards.riscv_dm_board import RiscvAbstractDMBoard + +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.utils.override import overrides +from gem5.resources.resource import AbstractResource +from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload +from gem5.components.boards.abstract_system_board import AbstractSystemBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +from gem5.isas import ISA + +import m5 + +from m5.objects import AddrRange, HiFive, Frequency, Port + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + + +class RiscvGem5DMBoard(RiscvAbstractDMBoard): + """ + A board capable of full system simulation for RISC-V + At a high-level, this is based on the HiFive Unmatched board from SiFive. + This board assumes that you will be booting Linux. + + **Limitations** + * Only works with classic caches + """ + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_addr_range: AddrRange = None, + ) -> None: + self._localMemory = local_memory + self._remoteMemory = remote_memory + # If the remote_memory_addr_range is not provided, we'll assume that + # it starts at 0x80000000 + local_memory_size and ends at it's own size + if remote_memory_addr_range is None: + remote_memory_addr_range = AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=remote_memory_addr_range, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + self.remote_memory = remote_memory + + if processor.get_isa() != ISA.RISCV: + raise Exception( + "The RISCVBoard requires a processor using the" + "RISCV ISA. Current processor ISA: " + f"'{processor.get_isa().name}'." + ) + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_remote_memory().get_mem_ports() + + @overrides(RiscvAbstractDMBoard) + def _incorporate_memory_range(self): + # If the memory exists in gem5, then, we need to incorporate this + # memory range. + self.get_local_memory().set_memory_range(self._local_mem_ranges) + self.get_remote_memory().set_memory_range(self._remote_mem_ranges) + + @overrides(RiscvAbstractDMBoard) + def get_default_kernel_args(self) -> List[str]: + return [ + "console=ttyS0", + "root={root_value}", + "init=/root/gem5-init.sh", + "rw", + ] + + @overrides(AbstractBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + # need to connect the remote links to the board. + if self.get_cache_hierarchy().is_ruby(): + fatal( + "remote memory is only supported in classic caches at " + + "the moment!") + if isinstance(self.get_remote_memory(), RemoteChanneledMemory): + for ports in self.get_remote_memory().remote_links: + self.get_cache_hierarchy().membus.mem_side_ports = \ + ports.cpu_side_ports + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(AbstractBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory_setup/test_board_sst.py b/disaggregated_memory/boards/riscv_sst_board.py similarity index 50% rename from disaggregated_memory_setup/test_board_sst.py rename to disaggregated_memory/boards/riscv_sst_board.py index 459adfface..db3b6edae3 100644 --- a/disaggregated_memory_setup/test_board_sst.py +++ b/disaggregated_memory/boards/riscv_sst_board.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023 The Regents of the University of California +# Copyright (c) 2023 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -24,7 +24,14 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from m5.objects import Port, AddrRange, OutgoingRequestBridge +import os + +from typing import List, Optional, Sequence, Tuple + +from boards.riscv_dm_board import RiscvAbstractDMBoard + +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.utils.override import overrides from gem5.resources.resource import AbstractResource from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload from gem5.components.boards.abstract_system_board import AbstractSystemBoard @@ -33,39 +40,88 @@ from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( AbstractCacheHierarchy, ) -from gem5.components.processors.abstract_generator import AbstractGenerator -from gem5.components.boards.test_board import TestBoard -from typing import List, Optional, Sequence, Tuple -from gem5.utils.override import overrides +from gem5.isas import ISA +import m5 -class TestBoardForSST(TestBoard): - """This board implements a test board for SST/External Memory devices. It - is assumed that the device has two memories. +from m5.objects import ( + AddrRange, + HiFive, + Frequency, + Port, + OutgoingRequestBridge, + NoncoherentXBar, +) + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + + +class RiscvSstDMBoard(RiscvAbstractDMBoard): + """ + A board capable of full system simulation for multiple RISC-V nodes. + At a high-level, this is based on the HiFive Unmatched board from SiFive. + This board assumes that you will be booting Linux. + + **Limitations** + * Only works with classic caches """ def __init__( self, clk_freq: str, - generator: AbstractGenerator, - remote_memory_size: str, - memory: Optional[AbstractMemorySystem], - cache_hierarchy: Optional[AbstractCacheHierarchy], - ): - self._localMemory = None - if memory is not None: - self._localMemory = memory - self._remoteMemory = OutgoingRequestBridge() - self._remoteMemorySize = remote_memory_size + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: "ExternalRemoteMemoryInterface", + cache_hierarchy: AbstractCacheHierarchy, + ) -> None: + self._localMemory = local_memory + # Since the remote memory is defined in SST's side, we only need the + # size of this memory while setting up stuff from Gem5's side. + self._remoteMemory = remote_memory + # The remote memory is either setup with a size or an address range. + # We need to determine if the address range is set. if not, then we + # need to find the starting and ending of the the external memory + # range. + if not self._remoteMemory.get_set_using_addr_ranges(): + # Address ranges were not set, but the system knows the size + # If the remote_memory_addr_range is not provided, we'll assume + # that it starts at 0x80000000 + local_memory_size and ends at it's + # own size + self._remoteMemory.remote_memory.physical_address_ranges = [ + AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + ] + # We need a size as a string to setup this memory. + self._remoteMemorySize = self._remoteMemory.get_size() + super().__init__( clk_freq=clk_freq, - generator=generator, - memory=self._localMemory, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=self._remoteMemory.remote_memory.physical_address_ranges[ + 0 + ], cache_hierarchy=cache_hierarchy, ) - self.local_memory = self._localMemory - self.remote_memory = self._remoteMemory + self.local_memory = local_memory + self.remote_memory = self._remoteMemory.remote_memory + + if processor.get_isa() != ISA.RISCV: + raise Exception( + "The RISCVBoard requires a processor using the" + "RISCV ISA. Current processor ISA: " + f"'{processor.get_isa().name}'." + ) @overrides(AbstractSystemBoard) def get_memory(self) -> "AbstractMemory": @@ -93,7 +149,6 @@ def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: return self.get_local_memory().get_mem_ports() def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: - return [ ( self.get_remote_memory().physical_address_ranges, @@ -101,61 +156,15 @@ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: ) ] - @overrides(AbstractSystemBoard) - def _setup_memory_ranges(self): - # The local memory can be empty in this case. - local_memory = None - remote_memory = self.get_remote_mem() - # This is a string - remote_mem_size = self._remoteMemorySize - # using a _global_ memory range to keep a track of all the memory - # ranges. This is used to generate the dtb for this machine - start_addr_for_remote = 0x0 - self._global_mem_ranges = [] - if self.get_local_memory() is not None: - local_memory = self.get_local_memory() - self._global_mem_ranges.append( - AddrRange(start=0x0, size=local_memory.get_size()) - ) - start_addr_for_remote = local_memory.get_size() - local_memory.set_memory_range(self._global_mem_ranges[0]) - - self._global_mem_ranges.append( - AddrRange(start=start_addr_for_remote, size=remote_mem_size()) - ) - - remote_memory.physical_address_ranges = self._global_mem_ranges[-1] - - # the memory has to be setup for both the memory ranges. there is one - # local memory range, close to the host machine and the other range is - # pure memory, far from the host. - - # self._local_mem_ranges = [ - # AddrRange(start=0x80000000, size=local_mem_size) - # ] - - # The remote memory starts where the local memory ends. Therefore it - # has to be offset by the local memory's size. - # self._remote_mem_ranges = [ - # AddrRange(start=0x80000000 + local_mem_size, size=remote_mem_size) - # ] - - # keeping a hole in the mem ranges to simulate multiple nodes without - # using a translator simobject. - # remote_memory_start_addr = 0x80000000 + local_mem_size + self._instanceCount * 0x80000000 - # self._remote_mem_ranges = [ - # AddrRange(start=remote_memory_start_addr, size=remote_mem_size) - # ] - - # self._global_mem_ranges.append(self._local_mem_ranges[0]) - # self._global_mem_ranges.append(self._remote_mem_ranges[0]) + @overrides(RiscvAbstractDMBoard) + def _incorporate_memory_range(self): + self.get_local_memory().set_memory_range(self._local_mem_ranges) - # setting the memory ranges for both of the memory ranges. - # local_memory.set_memory_range(self._local_mem_ranges) - # remote_memory.physical_address_ranges = self._remote_mem_ranges - # remote_memory.set_memory_range(self._remote_mem_ranges) + @overrides(RiscvAbstractDMBoard) + def get_default_kernel_args(self) -> List[str]: + return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"] - @overrides(TestBoard) + @overrides(RiscvAbstractDMBoard) def _connect_things(self) -> None: """Connects all the components to the board. @@ -183,24 +192,30 @@ def _connect_things(self) -> None: ) # Incorporate the memory into the motherboard. - if self.get_local_memory() is not None: - self.get_local_memory().incorporate_memory(self) - - # # Add a NoncoherentXBar here - - # self.remote_link = NoncoherentXBar( - # frontend_latency = 0, - # forward_latency = 0, - # response_latency = 0, - # width = 64 - # ) - # self.get_remote_memory().port = self.remote_link.mem_side_ports - # self.get_cache_hierarchy().membus.mem_side_ports = self.remote_link.cpu_side_ports - - self.get_remote_memory().port = ( - self.get_cache_hierarchy().membus.mem_side_ports - ) - # self.get_remote_memory().incorporate_memory(self) + self.get_local_memory().incorporate_memory(self) + # we need to find whether there is any external latency. if yes, then + # add xbar to add this latency. + + if self.get_remote_memory().is_xbar_required(): + self.remote_link = NoncoherentXBar( + frontend_latency=0, + forward_latency=0, + response_latency=self.get_remote_memory()._remote_memory_latency, + width=64, + ) + # connect the remote memory port to the remote link + self.get_remote_memory().remote_memory.port = ( + self.remote_link.mem_side_ports + ) + # The remote link is then connected to the membus + self.get_cache_hierarchy().membus.mem_side_ports = ( + self.remote_link.cpu_side_ports + ) + else: + # Connect the external memory directly to the motherboard. + self.get_remote_memory().remote_memory.port = ( + self.get_cache_hierarchy().membus.mem_side_ports + ) # Incorporate the cache hierarchy for the motherboard. if self.get_cache_hierarchy(): @@ -211,24 +226,19 @@ def _connect_things(self) -> None: self._connect_things_called = True - if not self.get_cache_hierarchy(): - # If we have no caches, then there must be a one-to-one - # connection between the generators and the memories. - assert len(self.get_processor().get_cores()) == 1 - # assert len(self.get_memory().get_mem_ports()) == 1 - self.get_processor().get_cores()[0].connect_dcache( - self.get_remote_memory().get_remote_mem_ports()[0][1] - ) + @overrides(RiscvAbstractDMBoard) + def get_default_kernel_args(self) -> List[str]: + return [ + "console=ttyS0", + "root={root_value}", + "init=/root/gem5-init.sh", + "rw", + ] - @overrides(TestBoard) + @overrides(AbstractBoard) def _post_instantiate(self): """Called to set up anything needed after m5.instantiate""" - print( - "__ranges__", self.get_remote_memory().physical_address_ranges[0] - ) self.get_processor()._post_instantiate() if self.get_cache_hierarchy(): self.get_cache_hierarchy()._post_instantiate() - if self.get_local_memory() is not None: - self.get_local_memory()._post_instantiate() - # self.get_remote_memory()._post_instantiate() + self.get_local_memory()._post_instantiate() diff --git a/disaggregated_memory_setup/x86_dm_board.py b/disaggregated_memory/boards/x86_dm_board.py similarity index 100% rename from disaggregated_memory_setup/x86_dm_board.py rename to disaggregated_memory/boards/x86_dm_board.py diff --git a/disaggregated_memory_setup/dm_caches.py b/disaggregated_memory/cachehierarchies/dm_caches.py similarity index 95% rename from disaggregated_memory_setup/dm_caches.py rename to disaggregated_memory/cachehierarchies/dm_caches.py index f69cd30d0a..c016d183a8 100644 --- a/disaggregated_memory_setup/dm_caches.py +++ b/disaggregated_memory/cachehierarchies/dm_caches.py @@ -38,7 +38,7 @@ from gem5.utils.override import overrides -class ClassicPL1PL2DMCache(PrivateL1PrivateL2CacheHierarchy): +class ClassicPrivateL1PrivateL2DMCache(PrivateL1PrivateL2CacheHierarchy): def __init__( self, l1d_size: str, @@ -67,9 +67,8 @@ def incorporate_cache(self, board: AbstractBoard) -> None: for cntr in board.get_local_memory().get_memory_controllers(): cntr.port = self.membus.mem_side_ports - # comment these lines for SST caches - # for cntr in board.get_remote_memory().get_memory_controllers(): - # cntr.port = self.membus.mem_side_ports + for cntr in board.get_remote_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports self.l1icaches = [ L1ICache(size=self._l1i_size) diff --git a/disaggregated_memory_setup/dm_caches_arm.py b/disaggregated_memory/cachehierarchies/dm_caches_sst.py similarity index 94% rename from disaggregated_memory_setup/dm_caches_arm.py rename to disaggregated_memory/cachehierarchies/dm_caches_sst.py index d732f7c357..9c7a99a909 100644 --- a/disaggregated_memory_setup/dm_caches_arm.py +++ b/disaggregated_memory/cachehierarchies/dm_caches_sst.py @@ -38,14 +38,16 @@ from gem5.utils.override import overrides -class ClassicPL1PL2DMCacheArm(PrivateL1PrivateL2CacheHierarchy): +class ClassicPrivateL1PrivateL2SstDMCache(PrivateL1PrivateL2CacheHierarchy): def __init__( self, l1d_size: str, l1i_size: str, l2_size: str, ) -> None: - """ + """We need a specific version of DM caches for the external memory, + which does not connect the remote memory controller ports directly. + :param l1d_size: The size of the L1 Data Cache (e.g., "32kB"). :type l1d_size: str :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB"). @@ -64,7 +66,7 @@ def incorporate_cache(self, board: AbstractBoard) -> None: # Set up the system port for functional access from the simulator. board.connect_system_port(self.membus.cpu_side_ports) - for cntr in board.get_memory().get_memory_controllers(): + for cntr in board.get_local_memory().get_memory_controllers(): cntr.port = self.membus.mem_side_ports self.l1icaches = [ diff --git a/disaggregated_memory_setup/mi_example_dm_caches.py b/disaggregated_memory/cachehierarchies/mi_example_dm_caches.py similarity index 100% rename from disaggregated_memory_setup/mi_example_dm_caches.py rename to disaggregated_memory/cachehierarchies/mi_example_dm_caches.py diff --git a/disaggregated_memory/configs/arm-gem5-numa-nodes.py b/disaggregated_memory/configs/arm-gem5-numa-nodes.py new file mode 100644 index 0000000000..dd1d9a0b1d --- /dev/null +++ b/disaggregated_memory/configs/arm-gem5-numa-nodes.py @@ -0,0 +1,141 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system ARM Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `STREAM`. The simulation ends when the +startup is completed successfully. +""" + +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root + +from boards.arm_gem5_board import ArmGem5DMBoard +from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2DMCache +from memories.remote_memory import RemoteChanneledMemory +from gem5.utils.requires import requires +from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for ARM. + +requires(isa_required=ISA.ARM) + +# defining a new type of memory with latency added. This memory interface can +# be used as a remote memory interface to simulate disaggregated memory. +def RemoteDualChannelDDR4_2400( + size: Optional[str] = None, remote_offset_latency=300 +) -> AbstractMemorySystem: + """ + A dual channel memory system using DDR4_2400_8x8 based DIMM + """ + return RemoteChanneledMemory( + DDR4_2400_8x8, + 1, + 64, + size=size, + remote_offset_latency=remote_offset_latency, + ) + +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2DMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = DualChannelDDR4_2400(size="1GiB") +# The remote meomry can either be a simple Memory Interface, which is from a +# different memory arange or it can be a Remote Memory Range, which has an +# inherent delay while performing reads and writes into that memory. For simple +# memory, use any MemInterfaces available in gem5 standard library. For remtoe +# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this +# config script to extend any existing MemInterface class and add latency value +# to that memory. +remote_memory = RemoteDualChannelDDR4_2400( + size="1GB", remote_offset_latency=750 +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.ARM, num_cores=1) +# Here we setup the board which allows us to do Full-System ARM simulations. +board = ArmGem5DMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "m5 dumpresetstats 0 ;", + "numactl --cpubind=0 --membind=0 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=0,1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "m5 exit;", +] +board.set_kernel_disk_workload( + kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), + bootloader=CustomResource( + "/home/kaustavg/.cache/gem5/arm64-bootloader-foundation" + ), + disk_image=DiskImageResource( + "/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img", + root_partition="1", + ), + readfile_contents=" ".join(cmd), +) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +simulator = Simulator(board=board) +simulator.run() diff --git a/disaggregated_memory/configs/arm-sst-numa-nodes.py b/disaggregated_memory/configs/arm-sst-numa-nodes.py new file mode 100644 index 0000000000..b35498e15c --- /dev/null +++ b/disaggregated_memory/configs/arm-sst-numa-nodes.py @@ -0,0 +1,130 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system ARM Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `STREAM`. The simulation ends when the +startup is completed successfully. + +* This script has to be executed from SST +""" + +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root, AddrRange + +from boards.arm_sst_board import ArmSstDMBoard +from cachehierarchies.dm_caches_sst import ClassicPrivateL1PrivateL2SstDMCache +from memories.external_remote_memory import ExternalRemoteMemoryInterface +from gem5.utils.requires import requires +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. +requires(isa_required=ISA.ARM) +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = DualChannelDDR4_2400(size="2GiB") +# Either suppy the size of the remote memory or the address range of the +# remote memory. Since this is inside the external memory, it does not matter +# what type of memory is being simulated. This can either be initialized with +# a size or a memory address range, which is mroe flexible. Adding remote +# memory latency automatically adds a non-coherent crossbar to simulate latenyc +remote_memory = ExternalRemoteMemoryInterface( + addr_range=AddrRange(0x100000000, size="2GiB"), remote_memory_latency=750 +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, isa=ISA.ARM, num_cores=1 +) +# Here we setup the board which allows us to do Full-System ARM simulations. +board = ArmSstDMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "m5 dumpresetstats 0 ;", + "numactl --cpubind=0 --membind=0 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=0,1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "m5 exit;", +] + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "kernel" : CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), + "bootloader" : CustomResource("/home/kaustavg/.cache/gem5/arm64-bootloader-foundation"), + "disk_image" : DiskImageResource( + local_path="/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img", + root_partition="1", + ), + "readfile_contents" : " ".join(cmd) + }, +) +# This disk image needs to have NUMA tools installed. +board.set_workload(workload) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +board._pre_instantiate() +root = Root(full_system=True, board=board) +board._post_instantiate() +m5.instantiate() diff --git a/disaggregated_memory_setup/numa_config_w_delay.py b/disaggregated_memory/configs/riscv-gem5-numa-nodes.py similarity index 61% rename from disaggregated_memory_setup/numa_config_w_delay.py rename to disaggregated_memory/configs/riscv-gem5-numa-nodes.py index 1af6392f96..e011298803 100644 --- a/disaggregated_memory_setup/numa_config_w_delay.py +++ b/disaggregated_memory/configs/riscv-gem5-numa-nodes.py @@ -27,27 +27,27 @@ """ This script shows an example of running a full system RISCV Ubuntu boot simulation using the gem5 library. This simulation boots Ubuntu 20.04 using -2 TIMING CPU cores. The simulation ends when the startup is completed -successfully. +1 TIMING CPU cores and executes `numastat`. The simulation ends when the +startup is completed successfully. +""" -Usage ------ +import os +import sys -``` -scons build/RISCV/gem5.opt -./build/RISCV/gem5.opt \ - configs/example/gem5_library/riscv-ubuntu-run.py -``` -""" +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) import m5 from m5.objects import Root +from boards.riscv_gem5_board import RiscvGem5DMBoard +from cachehierarchies.dm_caches import ClassicPL1PL2DMCache from gem5.utils.requires import requires -from riscv_dm_board import RiscvDMBoard -from dm_caches import ClassicPL1PL2DMCache from gem5.components.memory import DualChannelDDR4_2400 -from gem5.components.memory.multi_channel import * +from memories.remote_memory import RemoteChanneledMemory +from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 from gem5.components.processors.simple_processor import SimpleProcessor from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA @@ -56,9 +56,9 @@ from gem5.resources.workload import * from gem5.resources.resource import * -from gem5.components.memory.abstract_memory_system import AbstractMemorySystem -from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 -from remote_memory import RemoteChanneledMemory +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) # defining a new type of memory with latency added. def RemoteDualChannelDDR4_2400( @@ -74,63 +74,57 @@ def RemoteDualChannelDDR4_2400( size=size, remote_offset_latency=remote_offset_latency, ) - - -# This runs a check to ensure the gem5 binary is compiled for RISCV. - -requires(isa_required=ISA.RISCV) - -# With RISCV, we use simple caches. - -cache_hierarchy = ClassicPL1PL2DMCache( - l1d_size="16kB", l1i_size="16kB", l2_size="256kB" +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" ) - -# Local memory: Dual Channel DDR4 2400 DRAM device with no delay. -# Remote memory: Dual Channel DDR4 2400 DRAM device with 750 clocks (250 ns). -# 250 ns is taken from the TPP paper. - -local_memory = DualChannelDDR4_2400(size="512MB") +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = DualChannelDDR4_2400(size="2GiB") +# The remote meomry can either be a simple Memory Interface, which is from a +# different memory arange or it can be a Remote Memory Range, which has an +# inherent delay while performing reads and writes into that memory. For simple +# memory, use any MemInterfaces available in gem5 standard library. For remtoe +# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this +# config script to extend any existing MemInterface class and add latency value +# to that memory. remote_memory = RemoteDualChannelDDR4_2400( - size="2GB", remote_offset_latency=750 + size="1GB", remote_offset_latency=750 ) - # Here we setup the processor. We use a simple processor. processor = SimpleProcessor( - cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=2 + cpu_type=CPUTypes.ATOMIC, isa=ISA.RISCV, num_cores=1 ) - -# Here we setup the board. The RiscvBoard allows for Full-System RISCV -# simulations. -board = RiscvDMBoard( +# Here we setup the board which allows us to do Full-System RISCV simulations. +board = RiscvGem5DMBoard( clk_freq="3GHz", processor=processor, local_memory=local_memory, remote_memory=remote_memory, cache_hierarchy=cache_hierarchy, ) - +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;" + "m5 exit;" +] workload = CustomWorkload( function="set_kernel_disk_workload", parameters={ - "disk_image": CustomDiskImageResource( - local_path=os.path.join( - os.getcwd(), "/home/kaustavg/disk-images/rv64gc-hpc-2204.img" - ), - disk_root_partition="1", + "disk_image": DiskImageResource( + local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img", + root_partition="1", ), "kernel": CustomResource( - os.path.join(os.getcwd(), "/home/kaustavg/bbl") + "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/SST13/kernels/gem5-resources/src/riscv-boot-exit-nodisk/riscv-pk/build/bbl" ), + "readfile_contents": " ".join(cmd), }, ) - -# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots -# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` -# instruction which stops the simulation. When the simulation has ended you may -# inspect `m5out/system.pc.com_1.device` to see the stdout. -# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. board.set_workload(workload) simulator = Simulator(board=board) simulator.run() -simulator.run() diff --git a/disaggregated_memory_setup/numa_config.py b/disaggregated_memory/configs/riscv-sst-numa-nodes.py similarity index 63% rename from disaggregated_memory_setup/numa_config.py rename to disaggregated_memory/configs/riscv-sst-numa-nodes.py index 2436a7c735..0e07e13237 100644 --- a/disaggregated_memory_setup/numa_config.py +++ b/disaggregated_memory/configs/riscv-sst-numa-nodes.py @@ -27,27 +27,29 @@ """ This script shows an example of running a full system RISCV Ubuntu boot simulation using the gem5 library. This simulation boots Ubuntu 20.04 using -2 TIMING CPU cores. The simulation ends when the startup is completed -successfully. +1 TIMING CPU cores and executes `numastat`. The simulation ends when the +startup is completed successfully. -Usage ------ - -``` -scons build/RISCV/gem5.opt -./build/RISCV/gem5.opt \ - configs/example/gem5_library/riscv-ubuntu-run.py -``` +* This script has to be executed from SST """ +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + import m5 -from m5.objects import Root +from m5.objects import Root, AddrRange + +from boards.riscv_sst_board import RiscvSstDMBoard +from cachehierarchies.dm_caches_sst import ClassicPrivateL1PrivateL2SstDMCache +from memories.external_remote_memory import ExternalRemoteMemoryInterface from gem5.utils.requires import requires -from riscv_dm_board import RiscvDMBoard -from dm_caches import ClassicPL1PL2DMCache from gem5.components.memory import DualChannelDDR4_2400 -from gem5.components.memory.multi_channel import * from gem5.components.processors.simple_processor import SimpleProcessor from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA @@ -57,30 +59,27 @@ from gem5.resources.resource import * # This runs a check to ensure the gem5 binary is compiled for RISCV. - requires(isa_required=ISA.RISCV) - # Here we setup the parameters of the l1 and l2 caches. - -cache_hierarchy = ClassicPL1PL2DMCache( - l1d_size="2MB", l1i_size="2MB", l2_size="4MB" +cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" ) - # Memory: Dual Channel DDR4 2400 DRAM device. - -local_memory = DualChannelDDR4_2400(size="64MiB") -remote_memory = DualChannelDDR4_2400(size="64MiB") - -# remote_memory = DualChannelHBM_1000(size="4GB") - +local_memory = DualChannelDDR4_2400(size="2GiB") +# Either suppy the size of the remote memory or the address range of the +# remote memory. Since this is inside the external memory, it does not matter +# what type of memory is being simulated. This can either be initialized with +# a size or a memory address range, which is mroe flexible. Adding remote +# memory latency automatically adds a non-coherent crossbar to simulate latenyc +remote_memory = ExternalRemoteMemoryInterface( + addr_range=AddrRange(0x100000000, size="2GiB"), remote_memory_latency=750 +) # Here we setup the processor. We use a simple processor. processor = SimpleProcessor( - cpu_type=CPUTypes.ATOMIC, isa=ISA.RISCV, num_cores=1 + cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1 ) - -# Here we setup the board. The RiscvBoard allows for Full-System RISCV -# simulations. -board = RiscvDMBoard( +# Here we setup the board which allows us to do Full-System RISCV simulations. +board = RiscvSstDMBoard( clk_freq="3GHz", processor=processor, local_memory=local_memory, @@ -91,36 +90,30 @@ cmd = [ "mount -t sysfs - /sys;", "mount -t proc - /proc;", - "bin/bash;" - # "m5 exit;" + "numastat;", + "m5 exit;" ] workload = CustomWorkload( function="set_kernel_disk_workload", parameters={ "disk_image": DiskImageResource( - # local_path=os.path.join( - # os.getcwd(), "/home/kaustavg/ubuntu-numa.img" - # ), local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img", root_partition="1", ), "kernel": CustomResource( "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/SST13/kernels/gem5-resources/src/riscv-boot-exit-nodisk/riscv-pk/build/bbl" - # os.path.join(os.getcwd(), "/home/kaustavg/bbl") ), "readfile_contents": " ".join(cmd), }, ) -print("______", " ".join(cmd)) -# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots -# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` -# instruction which stops the simulation. When the simulation has ended you may -# inspect `m5out/system.pc.com_1.device` to see the stdout. -# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) - -# This disk image has NUMA tools installed. +# This disk image needs to have NUMA tools installed. board.set_workload(workload) -simulator = Simulator(board=board) -simulator.run() -simulator.run() +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +board._pre_instantiate() +root = Root(full_system=True, board=board) +board._post_instantiate() +m5.instantiate() diff --git a/disaggregated_memory/memories/external_remote_memory.py b/disaggregated_memory/memories/external_remote_memory.py new file mode 100644 index 0000000000..6211f1679a --- /dev/null +++ b/disaggregated_memory/memories/external_remote_memory.py @@ -0,0 +1,86 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""We need a class that extends the outgoing bridge from gem5. The goal +of this class to have a MemInterface like class in the future, where we'll +append mem_ranges within this interface.""" + +import m5 +from m5.util import fatal +from m5.objects.XBar import NoncoherentXBar +from m5.objects import OutgoingRequestBridge, AddrRange, Tick + + +class ExternalRemoteMemoryInterface: + def __init__( + self, + size: "str" = None, + addr_range: AddrRange = None, + remote_memory_latency: Tick = None, + ): + # We will create a non-coherent cross bar if the user wants to simulate + # latency for the remote memory links. + self._xbar_required = False + # We setup the remote memory with size or address range. This allows us + # to quickly scale the setup with N nodes. + self._size = None + self._set_using_addr_ranges = False + self.remote_memory = OutgoingRequestBridge() + # The user needs to provide either the size of the remote memory or the + # range of the remote memory. + if size is None and addr_range is None: + fatal("External memory needs to either have a size or a range!") + else: + if addr_range is not None: + self.remote_memory.physical_address_ranges = [addr_range] + self._size = self.remote_memory.physical_address_ranges[ + 0 + ].size() + self._set_using_addr_ranges = True + # The size will be setup in the board in case ranges are not given + # by the user. + else: + self._size = size + + # If there is a remote latency specified, create a non_coherent + # cross_bar. + if remote_memory_latency is not None: + self._xbar_required = True + self._remote_memory_latency = remote_memory_latency + + def get_size(self): + return self._size + + # def set_size(self): + # self._size = self.remote_memory.physical_addr_ranges[0].size() + + def is_xbar_required(self): + # If an XBar is required, it should be added in the connect_things to + # avoid initializing an orphan node. + return self._xbar_required + + def get_set_using_addr_ranges(self): + return self._set_using_addr_ranges diff --git a/disaggregated_memory_setup/remote_memory.py b/disaggregated_memory/memories/remote_memory.py similarity index 99% rename from disaggregated_memory_setup/remote_memory.py rename to disaggregated_memory/memories/remote_memory.py index cd3e1afc25..367d29830c 100644 --- a/disaggregated_memory_setup/remote_memory.py +++ b/disaggregated_memory/memories/remote_memory.py @@ -66,7 +66,7 @@ def _create_mem_interfaces_controller(self): frontend_latency=self._remote_latency, forward_latency=0, response_latency=0, - width=8, + width=64, ) for _ in range(self._num_channels) ] diff --git a/disaggregated_memory_setup/arm_sst_board.py b/disaggregated_memory_setup/arm_sst_board.py deleted file mode 100644 index ae5385530b..0000000000 --- a/disaggregated_memory_setup/arm_sst_board.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from m5.objects import ( - AddrRange, - VoltageDomain, - SrcClockDomain, - Terminal, - VncServer, - IOXBar, - BadAddr, - ArmSystem, -) - -from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation -from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease -from m5.objects.ArmFsWorkload import ArmFsLinux - -from m5.util.fdthelper import ( - Fdt, - FdtNode, - FdtProperty, - FdtPropertyStrings, - FdtPropertyWords, - FdtState, -) - -import os -import m5 -from abc import ABCMeta -from gem5.components.boards.arm_board import ArmBoard -from gem5.components.processors.abstract_processor import AbstractProcessor -from gem5.components.memory.abstract_memory_system import AbstractMemorySystem -from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( - AbstractCacheHierarchy, -) -from gem5.utils.override import overrides - -from typing import List, Sequence, Tuple - - -class ArmDMSSTBoard(ArmBoard): - __metaclass__ = ABCMeta - - def __init__( - self, - clk_freq: str, - processor: AbstractProcessor, - memory: AbstractMemorySystem, - cache_hierarchy: AbstractCacheHierarchy, - remote_memory_range: AddrRange, - platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), - release: ArmRelease = ArmDefaultRelease(), - ) -> None: - - self._remote_memory_range = remote_memory_range - super().__init__( - clk_freq=clk_freq, - processor=processor, - memory=memory, - cache_hierarchy=cache_hierarchy, - platform=platform, - release=release, - ) - - def get_remote_memory_addr_range(self): - return self._remote_memory_range - - @overrides(ArmBoard) - def _setup_board(self) -> None: - - # This board is expected to run full-system simulation. - # Loading ArmFsLinux() from `src/arch/arm/ArmFsWorkload.py` - self.workload = ArmFsLinux() - - # We are fixing the following variable for the ArmSystem to work. The - # security extension is checked while generating the dtb file in - # realview. This board does not have security extension enabled. - self._have_psci = False - - # highest_el_is_64 is set to True. True if the register width of the - # highest implemented exception level is 64 bits. - self.highest_el_is_64 = True - - # Setting up the voltage and the clock domain here for the ARM board. - # The ArmSystem/RealView expects voltage_domain to be a parameter. - # The voltage and the clock frequency are taken from the devices.py - # file from configs/example/arm. We set the clock to the same frequency - # as the user specified in the config script. - self.voltage_domain = VoltageDomain(voltage="1.0V") - self.clk_domain = SrcClockDomain( - clock=self._clk_freq, voltage_domain=self.voltage_domain - ) - - # The ARM board supports both Terminal and VncServer. - self.terminal = Terminal() - self.vncserver = VncServer() - - # Incoherent I/O Bus - self.iobus = IOXBar() - self.iobus.badaddr_responder = BadAddr() - self.iobus.default = self.iobus.badaddr_responder.pio - - # We now need to setup the dma_ports. - self._dma_ports = None - - # RealView sets up most of the on-chip and off-chip devices and GIC - # for the ARM board. These devices' information is also used to - # generate the dtb file. We then connect the I/O devices to the - # I/O bus. - self._setup_io_devices() - - # Once the realview is setup, we can continue setting up the memory - # ranges. ArmBoard's memory can only be setup once realview is - # initialized. - memory = self.get_memory() - mem_size = memory.get_size() - - # The following code is taken from configs/example/arm/devices.py. It - # sets up all the memory ranges for the board. - self.mem_ranges = [] - success = False - # self.mem_ranges.append(self.get_remote_memory_addr_range()) - for mem_range in self.realview._mem_regions: - size_in_range = min(mem_size, mem_range.size()) - self.mem_ranges.append( - AddrRange(start=mem_range.start, size=size_in_range) - ) - - mem_size -= size_in_range - if mem_size == 0: - success = True - break - - if success: - memory.set_memory_range(self.mem_ranges) - else: - raise ValueError("Memory size too big for platform capabilities") - - self.mem_ranges.append(self.get_remote_memory_addr_range()) - - # The PCI Devices. PCI devices can be added via the `_add_pci_device` - # function. - self._pci_devices = [] - - @overrides(ArmSystem) - def generateDeviceTree(self, state): - # Generate a device tree root node for the system by creating the root - # node and adding the generated subnodes of all children. - # When a child needs to add multiple nodes, this is done by also - # creating a node called '/' which will then be merged with the - # root instead of appended. - - def generateMemNode(numa_node_id, mem_range): - node = FdtNode(f"memory@{int(mem_range.start):x}") - node.append(FdtPropertyStrings("device_type", ["memory"])) - node.append( - FdtPropertyWords( - "reg", - state.addrCells(mem_range.start) - + state.sizeCells(mem_range.size()), - ) - ) - node.append(FdtPropertyWords("numa-node-id", [numa_node_id])) - return node - - root = FdtNode("/") - root.append(state.addrCellsProperty()) - root.append(state.sizeCellsProperty()) - - # Add memory nodes - for mem_range in self.mem_ranges: - root.append(generateMemNode(0, mem_range)) - root.append(generateMemNode(1, self.get_remote_memory_addr_range())) - - for node in self.recurseDeviceTree(state): - # Merge root nodes instead of adding them (for children - # that need to add multiple root level nodes) - if node.get_name() == root.get_name(): - root.merge(node) - else: - root.append(node) - - return root - - @overrides(ArmBoard) - def get_default_kernel_args(self) -> List[str]: - - # The default kernel string is taken from the devices.py file. - return [ - "console=ttyAMA0", - "lpj=19988480", - "norandmaps", - "root={root_value}", - "rw", - # f"mem={self.get_memory().get_size()}", - ] diff --git a/disaggregated_memory_setup/numa_config_sst.py b/disaggregated_memory_setup/numa_config_sst.py deleted file mode 100644 index 42ce794ae6..0000000000 --- a/disaggregated_memory_setup/numa_config_sst.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -This script shows an example of running a full system RISCV Ubuntu boot -simulation using the gem5 library. This simulation boots Ubuntu 20.04 using -2 TIMING CPU cores. The simulation ends when the startup is completed -successfully. - -Usage ------ - -``` -scons build/RISCV/gem5.opt -./build/RISCV/gem5.opt \ - configs/example/gem5_library/riscv-ubuntu-run.py -``` -""" - -import m5 -from m5.objects import Root, NoncoherentXBar - -from gem5.utils.requires import requires -from riscv_dm_board import RiscvDMBoard -from riscv_sst_board import RiscvSstBoard -from dm_caches import ClassicPL1PL2DMCache -from gem5.components.memory import SingleChannelDDR4_2400, DualChannelDDR4_2400 -from gem5.components.memory.multi_channel import * -from gem5.components.processors.simple_processor import SimpleProcessor -from gem5.components.processors.cpu_types import CPUTypes -from gem5.isas import ISA -from gem5.simulate.simulator import Simulator -from gem5.resources.workload import Workload -from gem5.resources.workload import * -from gem5.resources.resource import * - -# This runs a check to ensure the gem5 binary is compiled for RISCV. - -requires(isa_required=ISA.RISCV) - -# Here we setup the parameters of the l1 and l2 caches. - -cache_hierarchy = ClassicPL1PL2DMCache( - l1d_size="4MB", l1i_size="4MB", l2_size="32MB" -) - -# Memory: Dual Channel DDR4 2400 DRAM device. - -local_memory = SingleChannelDDR4_2400(size="2GiB") -# This has to be an argument coming from SST's side. -remote_memory_size = "2GiB" -# remote_memory = DualChannelDDR4_2400(size="4GB") - -# remote_memory = DualChannelHBM_1000(size="4GB") - -# Here we setup the processor. We use a simple processor. -processor = SimpleProcessor( - cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1 -) - -# Here we setup the board. The RiscvBoard allows for Full-System RISCV -# simulations. -board = RiscvSstBoard( - clk_freq="3GHz", - processor=processor, - local_memory=local_memory, - remote_memory_size=remote_memory_size, - cache_hierarchy=cache_hierarchy, -) - -workload = CustomWorkload( - function="set_kernel_disk_workload", - parameters={ - "disk_image": DiskImageResource( - # CustomDiskImageResource( - local_path=os.path.join( - os.getcwd(), "/home/kaustavg/ubuntu-numa.img" - ), - root_partition="1", - ), - "kernel": CustomResource( - os.path.join(os.getcwd(), "/home/kaustavg/bbl") - ), - }, -) - -# board.cache_hierarchy.membus = NoncoherentXBar( -# frontend_latency=0, -# forward_latency=0, -# response_latency=0, -# header_latency=0, -# width=64, -# ) - -# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots -# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` -# instruction which stops the simulation. When the simulation has ended you may -# inspect `m5out/system.pc.com_1.device` to see the stdout. -# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) - -# This disk image has NUMA tools installed. -board.set_workload(workload) -board._pre_instantiate() -root = Root(full_system=True, system=board) - -# simulator = Simulator(board=board) -# simulator.run() -# simulator.run() diff --git a/disaggregated_memory_setup/numa_config_sst_nodes.py b/disaggregated_memory_setup/numa_config_sst_nodes.py deleted file mode 100644 index f3d6ef83cc..0000000000 --- a/disaggregated_memory_setup/numa_config_sst_nodes.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -This script shows an example of running a full system RISCV Ubuntu boot -simulation using the gem5 library. This simulation boots Ubuntu 20.04 using -2 TIMING CPU cores. The simulation ends when the startup is completed -successfully. - -Usage ------ - -``` -scons build/RISCV/gem5.opt -./build/RISCV/gem5.opt \ - configs/example/gem5_library/riscv-ubuntu-run.py -``` -""" - -import m5 -import argparse -from m5.objects import Root, NoncoherentXBar - -from gem5.utils.requires import requires -from riscv_dm_board import RiscvDMBoard -from riscv_sst_board import RiscvSstBoard -from dm_caches import ClassicPL1PL2DMCache -from gem5.components.memory import SingleChannelDDR4_2400, DualChannelDDR4_2400 -from gem5.components.memory.multi_channel import * -from gem5.components.processors.simple_processor import SimpleProcessor -from gem5.components.processors.cpu_types import CPUTypes -from gem5.isas import ISA -from gem5.simulate.simulator import Simulator -from gem5.resources.workload import Workload -from gem5.resources.workload import * -from gem5.resources.resource import * - -# This runs a check to ensure the gem5 binary is compiled for RISCV. - -requires(isa_required=ISA.RISCV) - -parser = argparse.ArgumentParser( - description="An example configuration script to run multiple gem5 nodes." -) - -parser.add_argument( - "--cpu-clock-rate", - type=str, - required=True, - help="CPU clock rate. e.g. 3GHz etc", -) - -parser.add_argument( - "--instance", - type=int, - required=True, - help="Gem5 node instance", -) -args = parser.parse_args() -# Here we setup the parameters of the l1 and l2 caches. - -cache_hierarchy = ClassicPL1PL2DMCache( - l1d_size="128KiB", l1i_size="128KiB", l2_size="256KiB" -) - -# Memory: Dual Channel DDR4 2400 DRAM device. -# local_memory_size = str(2 * (int(args.instance) + 1)) + "GiB" -local_memory_size = "2GiB" -local_memory = SingleChannelDDR4_2400(size=local_memory_size) -# This has to be an argument coming from SST's side. -remote_memory_size = "2GiB" -print(local_memory_size) - -# remote_memory = DualChannelDDR4_2400(size="4GB") - -# remote_memory = DualChannelHBM_1000(size="4GB") - -# Here we setup the processor. We use a simple processor. -processor = SimpleProcessor(cpu_type=CPUTypes.O3, isa=ISA.RISCV, num_cores=1) -# processor = SimpleSwitchableProcessor( -# first_cpu_type=CPUTypes.O3, -# isa=ISA.RISCV, num_cores=1 -# ) -# Here we setup the board. The RiscvBoard allows for Full-System RISCV -# simulations. -board = RiscvSstBoard( - clk_freq=args.cpu_clock_rate, - processor=processor, - local_memory=local_memory, - remote_memory_size=remote_memory_size, - cache_hierarchy=cache_hierarchy, - instance=args.instance, -) - -cmd = [ - "mount -t sysfs - /sys;", - "mount -t proc - /proc;", - "numastat;", - "m5 dumpresetstats 0 ;", - "numactl --cpubind=0 --membind=0 -- /home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/stream.hw 1000000;", - "m5 dumpresetstats 0;", - "numactl --cpubind=0 --membind=0,1 -- /home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/stream.hw 1000000;", - "m5 dumpresetstats 0;", - "numactl --cpubind=0 --membind=1 -- /home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/stream.hw 1000000;", - "m5 dumpresetstats 0;", - "m5 exit;", -] - -workload = CustomWorkload( - function="set_kernel_disk_workload", - parameters={ - "disk_image": DiskImageResource( - # CustomDiskImageResource( - # local_path=os.path.join( - # os.getcwd(), "/home/kaustavg/ubuntu-numa.img" - # "/home/kaustavg/disk-images/rv64gc-hpc-2204.img" - # ), - local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img", - root_partition="1", - ), - "kernel": CustomResource( - os.path.join(os.getcwd(), "/home/kaustavg/bbl") - ), - "readfile_contents": " ".join(cmd), - }, -) - -# board.cache_hierarchy.membus = NoncoherentXBar( -# frontend_latency=0, -# forward_latency=0, -# response_latency=0, -# header_latency=0, -# width=64, -# ) - -# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots -# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` -# instruction which stops the simulation. When the simulation has ended you may -# inspect `m5out/system.pc.com_1.device` to see the stdout. -# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) - -# This disk image has NUMA tools installed. -board.set_workload(workload) -board._pre_instantiate() -root = Root(full_system=True, system=board) -m5.instantiate() -# simulator = Simulator(board=board) -# simulator.run() -# simulator.run() diff --git a/disaggregated_memory_setup/numa_config_sst_nodes_arm.py b/disaggregated_memory_setup/numa_config_sst_nodes_arm.py deleted file mode 100644 index 8a255e6c3f..0000000000 --- a/disaggregated_memory_setup/numa_config_sst_nodes_arm.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import m5 -from m5.objects import Root, OutgoingRequestBridge, AddrRange - -from gem5.utils.requires import requires -from arm_dm_sst_board import ArmDMSSTBoard -from dm_caches_arm import ClassicPL1PL2DMCacheArm -from gem5.components.memory import DualChannelDDR4_2400 -from gem5.components.memory.multi_channel import * -from gem5.components.processors.simple_processor import SimpleProcessor -from gem5.components.processors.cpu_types import CPUTypes -from gem5.isas import ISA -from gem5.simulate.simulator import Simulator -from gem5.resources.workload import Workload -from gem5.resources.workload import * -from gem5.resources.resource import * -from gem5.utils.override import overrides -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument("--command", type=str, help="Command run by guest") -parser.add_argument( - "--cpu-type", - type=str, - choices=["atomic", "timing"], - default="atomic", - help="CPU type", -) -parser.add_argument( - "--cpu-clock-rate", - type=str, - required=True, - help="CPU Clock", -) -parser.add_argument( - "--remote-memory-range", - type=str, - # default="2GiB", - required=True, - help="Remote memory range", -) -parser.add_argument( - "--local-memory-range", - type=str, - # default="2GiB", - required=True, - help="Local memory range", -) -args = parser.parse_args() -command = args.command -remote_memory_range = list(map(int, args.remote_memory_range.split(","))) -remote_memory_range = AddrRange(remote_memory_range[0], remote_memory_range[1]) -print(remote_memory_range) -requires(isa_required=ISA.ARM) - -cache_hierarchy = ClassicPL1PL2DMCache( - l1d_size="128KiB", l1i_size="128KiB", l2_size="256KiB" -) - -local_memory = DualChannelDDR4_2400(size=args.local_memory_range) - -cpu_type = {"atomic": CPUTypes.ATOMIC, "timing": CPUTypes.TIMING}[ - args.cpu_type -] -processor = SimpleProcessor(cpu_type=cpu_type, isa=ISA.ARM, num_cores=1) - - -class MyBoard(ArmDMSSTBoard): - @overrides(ArmDMSSTBoard) - def _pre_instantiate(self): - super()._pre_instantiate() - self.remote_memory_outgoing_bridge = OutgoingRequestBridge() - self.remote_memory_outgoing_bridge.physical_address_ranges = [ - self.get_remote_memory_addr_range() - ] - self.remote_memory_outgoing_bridge.port = ( - self.cache_hierarchy.membus.mem_side_ports - ) - - @overrides(ArmDMSSTBoard) - def get_default_kernel_args(self): - return [ - "root=/dev/vda1", - "init=/root/gem5-init.sh", - "console=ttyAMA0", - "lpj=19988480", - "norandmaps", - "rw", - # f"mem={self.get_memory().get_size()}", - ] - - -board = MyBoard( - clk_freq=args.cpu_clock_rate, - processor=processor, - memory=local_memory, - remote_memory_range=remote_memory_range, - cache_hierarchy=cache_hierarchy, -) - -board.set_kernel_disk_workload( - kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), - bootloader=Resource("arm64-bootloader-foundation"), - disk_image=DiskImageResource( - "/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img" - ), - readfile_contents=f"mount -t sysfs - /sys; mount -t proc - /proc; {command};", -) - -board._pre_instantiate() -root = Root(full_system=True, system=board) - -# simulator = Simulator(board=board) -# simulator._instantiate() diff --git a/disaggregated_memory_setup/numa_config_w_ruby_caches.py b/disaggregated_memory_setup/numa_config_w_ruby_caches.py deleted file mode 100644 index e3026d0951..0000000000 --- a/disaggregated_memory_setup/numa_config_w_ruby_caches.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -This script shows an example of running a full system RISCV Ubuntu boot -simulation using the gem5 library. This simulation boots Ubuntu 20.04 using -2 TIMING CPU cores. The simulation ends when the startup is completed -successfully. - -Usage ------ - -``` -scons build/RISCV/gem5.opt -./build/RISCV/gem5.opt \ - configs/example/gem5_library/riscv-ubuntu-run.py -``` -""" - -import m5 -from m5.objects import Root - -from gem5.utils.requires import requires -from riscv_dm_board import RiscvDMBoard -from dm_caches import ClassicPL1PL2DMCache -from mi_example_dm_caches import MIExampleDMCache -from gem5.components.memory import DualChannelDDR4_2400 -from gem5.components.memory.multi_channel import * -from gem5.components.processors.simple_processor import SimpleProcessor -from gem5.components.processors.cpu_types import CPUTypes -from gem5.isas import ISA -from gem5.simulate.simulator import Simulator -from gem5.resources.workload import Workload -from gem5.resources.workload import * -from gem5.resources.resource import * - -# This runs a check to ensure the gem5 binary is compiled for RISCV. - -requires(isa_required=ISA.RISCV) - -# With RISCV, we use simple caches. -# from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( -# PrivateL1PrivateL2CacheHierarchy, -# ) -## -# from gem5.components.cachehierarchies.classic.no_cache import NoCache - -# # Here we setup the parameters of the l1 and l2 caches. -# cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( -# l1d_size="16kB", l1i_size="16kB", l2_size="256kB" -# ) -## -# from gem5.components.cachehierarchies.ruby.mi_example_cache_hierarchy import ( -# MIExampleCacheHierarchy -# ) - -# Here we setup the parameters of the l1 and l2 caches. -# cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( -# l1d_size="16kB", l1i_size="16kB", l2_size="256kB" -# ) -# Here we setup the parameters of the l1 and l2 caches. - -# cache_hierarchy = ClassicPL1PL2DMCache( -# l1d_size="16kB", l1i_size="16kB", l2_size="256kB" -# ) - -cache_hierarchy = MIExampleDMCache(size="256kB", assoc=8) - -# cache_hierarchy = MIExampleCacheHierarchy( -# size="16kB", assoc="8" -# ) - -# cache_hierarchy = NoCache() - -# Memory: Dual Channel DDR4 2400 DRAM device. - -# local_memory = DualChannelDDR4_2400(size="512MB") -local_memory = DualChannelDDR4_2400(size="512MB") -remote_memory = DualChannelDDR4_2400(size="2GB") - -# remote_memory = DualChannelHBM_1000(size="4GB") - -# Here we setup the processor. We use a simple processor. -processor = SimpleProcessor( - cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=4 -) - -# Here we setup the board. The RiscvBoard allows for Full-System RISCV -# simulations. -board = RiscvDMBoard( - clk_freq="3GHz", - processor=processor, - local_memory=local_memory, - remote_memory=remote_memory, - cache_hierarchy=cache_hierarchy, -) - -workload = CustomWorkload( - function="set_kernel_disk_workload", - parameters={ - # "disk_image" : DiskImageResource(os.path.join(os.getcwd(), - # "arm64-ubuntu-numa"), - # root_partition = "1"), - "disk_image": CustomDiskImageResource( - local_path=os.path.join( - os.getcwd(), - # "/home/kaustavg/ubuntu-numa.img"), - # "/home/kaustavg/ubuntu-numa-bench.img"), - "/home/kaustavg/disk-images/rv64gc-hpc-2204.img", - ), - # local_path = "/home/kaustavg/kernel/gem5-resources/src/riscv-ubuntu/disk-image/base/ubuntu-ML.img", - # "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/gem5-simpoint/gem5x/gem5/riscv-ubuntu-20221118.img"), - disk_root_partition="1", - ), - # root_partition = "1"), - "kernel": CustomResource( - os.path.join( - os.getcwd(), - # "x86-linux-kernel-5.4.49")) - # "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/gem5-simpoint/gem5x/gem5/riscv-bootloader-vmlinux-5.10")) - "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/gem5-simpoint/gem5x/gem5/bbl", - ) - ), - # "bootloader": CustomResource(os.path.join(os.getcwd(), - # "vmlinux-5.4.49-NUMA.riscv")) - }, -) - -# Here we a full system workload: "riscv-ubuntu-20.04-boot" which boots -# Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` -# instruction which stops the simulation. When the simulation has ended you may -# inspect `m5out/system.pc.com_1.device` to see the stdout. -# board.set_workload(Workload("riscv-ubuntu-20.04-boot")) -board.set_workload(workload) -simulator = Simulator(board=board) -simulator.run() -simulator.run() diff --git a/disaggregated_memory_setup/numa_config_x86.py b/disaggregated_memory_setup/numa_config_x86.py deleted file mode 100644 index e50534c03e..0000000000 --- a/disaggregated_memory_setup/numa_config_x86.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) 2021 The Regents of the University of California. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -Script to run GAPBS benchmarks with gem5. The script expects the -benchmark program and the simulation size to run. The input is in the format - -The system is fixed with 2 CPU cores, MESI Two Level system cache and 3 GB -DDR4 memory. It uses the x86 board. - -This script will count the total number of instructions executed -in the ROI. It also tracks how much wallclock and simulated time. - -Usage: ------- - -``` -scons build/X86/gem5.opt -./build/X86/gem5.opt \ - configs/example/gem5_library/x86-gabps-benchmarks.py \ - --benchmark \ - --synthetic \ - --size -``` -""" - -import argparse -import time -import sys - -import m5 -from m5.objects import Root - -from gem5.utils.requires import requires -from gem5.components.boards.x86_board import X86Board -from gem5.components.memory import DualChannelDDR4_2400, SingleChannelDDR4_2400 - -# from gem5.components.processors.simple_switchable_processor import ( -# SimpleSwitchableProcessor, -# ) -from x86_dm_board import X86DMBoard -from gem5.components.processors.simple_processor import SimpleProcessor -from gem5.components.processors.cpu_types import CPUTypes -from gem5.isas import ISA -from gem5.coherence_protocol import CoherenceProtocol -from gem5.resources.resource import obtain_resource -from gem5.simulate.simulator import Simulator -from gem5.simulate.exit_event import ExitEvent -from dm_caches import ClassicPL1PL2DMCache - -requires( - isa_required=ISA.X86, - kvm_required=True, -) - -# Following are the list of benchmark programs for gapbs - - -cache_hierarchy = ClassicPL1PL2DMCache( - l1d_size="2MB", l1i_size="2MB", l2_size="4MB" -) -# Memory: Dual Channel DDR4 2400 DRAM device. -# The X86 board only supports 3 GB of main memory. - -local_memory = SingleChannelDDR4_2400(size="1GB") -remote_mem_size = "1GiB" -# remote_memory = SingleChannelDDR4_2400(size="3GB") - -# Here we setup the processor. This is a special switchable processor in which -# a starting core type and a switch core type must be specified. Once a -# configuration is instantiated a user may call `processor.switch()` to switch -# from the starting core types to the switch core types. In this simulation -# we start with KVM cores to simulate the OS boot, then switch to the Timing -# cores for the command we wish to run after boot. - -processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=1) - -# Here we setup the board. The X86Board allows for Full-System X86 simulations - -board = X86DMBoard( - clk_freq="3GHz", - processor=processor, - memory=local_memory, - remote_memory_size=remote_mem_size, - # remote_memory=remote_memory, - cache_hierarchy=cache_hierarchy, -) - -# Here we set the FS workload, i.e., gapbs benchmark program -# After simulation has ended you may inspect -# `m5out/system.pc.com_1.device` to the stdout, if any. - -board.set_kernel_disk_workload( - # The x86 linux kernel will be automatically downloaded to the - # `~/.cache/gem5` directory if not already present. - # gapbs benchamarks was tested with kernel version 4.19.83 - kernel=obtain_resource("x86-linux-kernel-4.19.83"), - # The x86-gapbs image will be automatically downloaded to the - # `~/.cache/gem5` directory if not already present. - disk_image=obtain_resource("x86-ubuntu-18.04-img"), - # readfile_contents=command, -) -board._pre_instantiate() -root = Root(full_system=True, system=board) -# simulator = Simulator(board=board) -# simulator.run() diff --git a/disaggregated_memory_setup/riscv_sst_board.py b/disaggregated_memory_setup/riscv_sst_board.py deleted file mode 100644 index 4c0513b704..0000000000 --- a/disaggregated_memory_setup/riscv_sst_board.py +++ /dev/null @@ -1,514 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os - -from typing import List, Optional, Sequence, Tuple - -from gem5.components.boards.riscv_board import RiscvBoard - -from gem5.components.boards.abstract_board import AbstractBoard -from gem5.utils.override import overrides -from gem5.resources.resource import AbstractResource -from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload -from gem5.components.boards.abstract_system_board import AbstractSystemBoard -from gem5.components.processors.abstract_processor import AbstractProcessor -from gem5.components.memory.abstract_memory_system import AbstractMemorySystem -from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( - AbstractCacheHierarchy, -) - -from gem5.isas import ISA - -import m5 - -from m5.objects import ( - AddrRange, - HiFive, - Frequency, - Port, - OutgoingRequestBridge, - NoncoherentXBar, -) - -from m5.util.fdthelper import ( - Fdt, - FdtNode, - FdtProperty, - FdtPropertyStrings, - FdtPropertyWords, - FdtState, -) - - -class RiscvSstBoard(RiscvBoard): - """ - A board capable of full system simulation for RISC-V - At a high-level, this is based on the HiFive Unmatched board from SiFive. - This board assumes that you will be booting Linux. - - **Limitations** - * Only works with classic caches - """ - - def __init__( - self, - clk_freq: str, - processor: AbstractProcessor, - local_memory: AbstractMemorySystem, - remote_memory_size: str, - cache_hierarchy: AbstractCacheHierarchy, - instance: int, - ) -> None: - self._localMemory = local_memory - # Since the remote memory is defined in SST's side, we only need the - # size of this memory while setting up stuff from Gem5's side. - self._remoteMemory = OutgoingRequestBridge() - self._remoteMemorySize = remote_memory_size - self._instanceCount = instance - super().__init__( - clk_freq=clk_freq, - processor=processor, - memory=local_memory, - cache_hierarchy=cache_hierarchy, - ) - self.local_memory = local_memory - # Do not setup the remote memory here. - self.remote_memory = self._remoteMemory - - if processor.get_isa() != ISA.RISCV: - raise Exception( - "The RISCVBoard requires a processor using the" - "RISCV ISA. Current processor ISA: " - f"'{processor.get_isa().name}'." - ) - - @overrides(AbstractSystemBoard) - def get_memory(self) -> "AbstractMemory": - """Get the memory (RAM) connected to the board. - - :returns: The memory system. - """ - raise NotImplementedError - - def get_local_memory(self) -> "AbstractMemory": - """Get the memory (RAM) connected to the board. - :returns: The local memory system. - """ - return self._localMemory - - def get_remote_memory(self) -> "AbstractMemory": - """Get the memory (RAM) connected to the board. - :returns: The remote memory system. - """ - # raise Exception("cannot call this method") - return self._remoteMemory - - @overrides(AbstractSystemBoard) - def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: - return self.get_local_memory().get_mem_ports() - - def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: - - return [ - ( - self.get_remote_memory().physical_address_ranges, - self.get_remote_memory().port, - ) - ] - # return self.get_remote_memory().port # get_mem_ports() - - @overrides(AbstractSystemBoard) - def _setup_memory_ranges(self): - - # the memory has to be setup for both the memory ranges. there is one - # local memory range, close to the host machine and the other range is - # pure memory, far from the host. - local_memory = self.get_local_memory() - remote_memory = self.get_remote_memory() - - local_mem_size = local_memory.get_size() - # This is a string - remote_mem_size = self._remoteMemorySize - - self._local_mem_ranges = [ - AddrRange(start=0x80000000, size=local_mem_size) - ] - - # The remote memory starts where the local memory ends. Therefore it - # has to be offset by the local memory's size. - # self._remote_mem_ranges = [ - # AddrRange(start=0x80000000 + local_mem_size, size=remote_mem_size) - # ] - - # keeping a hole in the mem ranges to simulate multiple nodes without - # using a translator simobject. - remote_memory_start_addr = ( - 0x80000000 + local_mem_size + self._instanceCount * 0x80000000 - ) - self._remote_mem_ranges = [ - AddrRange(start=remote_memory_start_addr, size=remote_mem_size) - ] - - # using a _global_ memory range to keep a track of all the memory - # ranges. This is used to generate the dtb for this machine - self._global_mem_ranges = [] - self._global_mem_ranges.append(self._local_mem_ranges[0]) - self._global_mem_ranges.append(self._remote_mem_ranges[0]) - - # setting the memory ranges for both of the memory ranges. - local_memory.set_memory_range(self._local_mem_ranges) - remote_memory.physical_address_ranges = self._remote_mem_ranges - # remote_memory.set_memory_range(self._remote_mem_ranges) - - @overrides(RiscvBoard) - def generate_device_tree(self, outdir: str) -> None: - """Creates the dtb and dts files. - Creates two files in the outdir: 'device.dtb' and 'device.dts' - :param outdir: Directory to output the files - """ - - state = FdtState(addr_cells=2, size_cells=2, cpu_cells=1) - root = FdtNode("/") - root.append(state.addrCellsProperty()) - root.append(state.sizeCellsProperty()) - root.appendCompatible(["riscv-virtio"]) - - for idx, mem_range in enumerate(self._global_mem_ranges): - node = FdtNode("memory@%x" % int(mem_range.start)) - node.append(FdtPropertyStrings("device_type", ["memory"])) - node.append( - FdtPropertyWords( - "reg", - state.addrCells(mem_range.start) - + state.sizeCells(mem_range.size()), - ) - ) - # adding the NUMA node information so that the OS can identify all - # the NUMA ranges. - node.append(FdtPropertyWords("numa-node-id", [idx])) - root.append(node) - - # See Documentation/devicetree/bindings/riscv/cpus.txt for details. - cpus_node = FdtNode("cpus") - cpus_state = FdtState(addr_cells=1, size_cells=0) - cpus_node.append(cpus_state.addrCellsProperty()) - cpus_node.append(cpus_state.sizeCellsProperty()) - # Used by the CLINT driver to set the timer frequency. Value taken from - # RISC-V kernel docs (Note: freedom-u540 is actually 1MHz) - cpus_node.append(FdtPropertyWords("timebase-frequency", [100000000])) - - for i, core in enumerate(self.get_processor().get_cores()): - node = FdtNode(f"cpu@{i}") - node.append(FdtPropertyStrings("device_type", "cpu")) - node.append(FdtPropertyWords("reg", state.CPUAddrCells(i))) - # The CPUs are also associated to the NUMA nodes. All the CPUs are - # bound to the first NUMA node. - node.append(FdtPropertyWords("numa-node-id", [0])) - node.append(FdtPropertyStrings("mmu-type", "riscv,sv48")) - node.append(FdtPropertyStrings("status", "okay")) - node.append(FdtPropertyStrings("riscv,isa", "rv64imafdc")) - # TODO: Should probably get this from the core. - freq = self.clk_domain.clock[0].frequency - node.append(FdtPropertyWords("clock-frequency", freq)) - node.appendCompatible(["riscv"]) - int_phandle = state.phandle(f"cpu@{i}.int_state") - node.appendPhandle(f"cpu@{i}") - - int_node = FdtNode("interrupt-controller") - int_state = FdtState(interrupt_cells=1) - int_phandle = int_state.phandle(f"cpu@{i}.int_state") - int_node.append(int_state.interruptCellsProperty()) - int_node.append(FdtProperty("interrupt-controller")) - int_node.appendCompatible("riscv,cpu-intc") - int_node.append(FdtPropertyWords("phandle", [int_phandle])) - - node.append(int_node) - cpus_node.append(node) - - root.append(cpus_node) - - soc_node = FdtNode("soc") - soc_state = FdtState(addr_cells=2, size_cells=2) - soc_node.append(soc_state.addrCellsProperty()) - soc_node.append(soc_state.sizeCellsProperty()) - soc_node.append(FdtProperty("ranges")) - soc_node.appendCompatible(["simple-bus"]) - - # CLINT node - clint = self.platform.clint - clint_node = clint.generateBasicPioDeviceNode( - soc_state, "clint", clint.pio_addr, clint.pio_size - ) - int_extended = list() - for i, core in enumerate(self.get_processor().get_cores()): - phandle = soc_state.phandle(f"cpu@{i}.int_state") - int_extended.append(phandle) - int_extended.append(0x3) - int_extended.append(phandle) - int_extended.append(0x7) - clint_node.append( - FdtPropertyWords("interrupts-extended", int_extended) - ) - # NUMA information is also associated with the CLINT controller. - # In this board, the objective to associate one NUMA node to the CPUs - # and the other node with no CPUs. To generalize this, an additional - # CLINT controller has to be created on this board, which will make it - # completely NUMA, instead of just disaggregated NUMA-like board. - clint_node.append(FdtPropertyWords("numa-node-id", [0])) - clint_node.appendCompatible(["riscv,clint0"]) - soc_node.append(clint_node) - - # PLIC node - plic = self.platform.plic - plic_node = plic.generateBasicPioDeviceNode( - soc_state, "plic", plic.pio_addr, plic.pio_size - ) - - int_state = FdtState(addr_cells=0, interrupt_cells=1) - plic_node.append(int_state.addrCellsProperty()) - plic_node.append(int_state.interruptCellsProperty()) - - phandle = int_state.phandle(plic) - plic_node.append(FdtPropertyWords("phandle", [phandle])) - # Similar to the CLINT interrupt controller, another PLIC controller is - # required to make this board a general NUMA like board. - plic_node.append(FdtPropertyWords("numa-node-id", [0])) - plic_node.append(FdtPropertyWords("riscv,ndev", [plic.n_src - 1])) - - int_extended = list() - for i, core in enumerate(self.get_processor().get_cores()): - phandle = state.phandle(f"cpu@{i}.int_state") - int_extended.append(phandle) - int_extended.append(0xB) - int_extended.append(phandle) - int_extended.append(0x9) - - plic_node.append(FdtPropertyWords("interrupts-extended", int_extended)) - plic_node.append(FdtProperty("interrupt-controller")) - plic_node.appendCompatible(["riscv,plic0"]) - - soc_node.append(plic_node) - - # PCI - pci_state = FdtState( - addr_cells=3, size_cells=2, cpu_cells=1, interrupt_cells=1 - ) - pci_node = FdtNode("pci") - - if int(self.platform.pci_host.conf_device_bits) == 8: - pci_node.appendCompatible("pci-host-cam-generic") - elif int(self.platform.pci_host.conf_device_bits) == 12: - pci_node.appendCompatible("pci-host-ecam-generic") - else: - m5.fatal("No compatibility string for the set conf_device_width") - - pci_node.append(FdtPropertyStrings("device_type", ["pci"])) - - # Cell sizes of child nodes/peripherals - pci_node.append(pci_state.addrCellsProperty()) - pci_node.append(pci_state.sizeCellsProperty()) - pci_node.append(pci_state.interruptCellsProperty()) - # PCI address for CPU - pci_node.append( - FdtPropertyWords( - "reg", - soc_state.addrCells(self.platform.pci_host.conf_base) - + soc_state.sizeCells(self.platform.pci_host.conf_size), - ) - ) - - # Ranges mapping - # For now some of this is hard coded, because the PCI module does not - # have a proper full understanding of the memory map, but adapting the - # PCI module is beyond the scope of what I'm trying to do here. - # Values are taken from the ARM VExpress_GEM5_V1 platform. - ranges = [] - # Pio address range - ranges += self.platform.pci_host.pciFdtAddr(space=1, addr=0) - ranges += soc_state.addrCells(self.platform.pci_host.pci_pio_base) - ranges += pci_state.sizeCells(0x10000) # Fixed size - - # AXI memory address range - ranges += self.platform.pci_host.pciFdtAddr(space=2, addr=0) - ranges += soc_state.addrCells(self.platform.pci_host.pci_mem_base) - ranges += pci_state.sizeCells(0x40000000) # Fixed size - pci_node.append(FdtPropertyWords("ranges", ranges)) - - # Interrupt mapping - plic_handle = int_state.phandle(plic) - int_base = self.platform.pci_host.int_base - - interrupts = [] - - for i in range(int(self.platform.pci_host.int_count)): - interrupts += self.platform.pci_host.pciFdtAddr( - device=i, addr=0 - ) + [int(i) + 1, plic_handle, int(int_base) + i] - - pci_node.append(FdtPropertyWords("interrupt-map", interrupts)) - - int_count = int(self.platform.pci_host.int_count) - if int_count & (int_count - 1): - fatal("PCI interrupt count should be power of 2") - - intmask = self.platform.pci_host.pciFdtAddr( - device=int_count - 1, addr=0 - ) + [0x0] - pci_node.append(FdtPropertyWords("interrupt-map-mask", intmask)) - - if self.platform.pci_host._dma_coherent: - pci_node.append(FdtProperty("dma-coherent")) - - soc_node.append(pci_node) - - # UART node - uart = self.platform.uart - uart_node = uart.generateBasicPioDeviceNode( - soc_state, "uart", uart.pio_addr, uart.pio_size - ) - uart_node.append( - FdtPropertyWords("interrupts", [self.platform.uart_int_id]) - ) - uart_node.append(FdtPropertyWords("clock-frequency", [0x384000])) - uart_node.append( - FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) - ) - uart_node.appendCompatible(["ns8250"]) - soc_node.append(uart_node) - - # VirtIO MMIO disk node - disk = self.disk - disk_node = disk.generateBasicPioDeviceNode( - soc_state, "virtio_mmio", disk.pio_addr, disk.pio_size - ) - disk_node.append(FdtPropertyWords("interrupts", [disk.interrupt_id])) - disk_node.append( - FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) - ) - disk_node.appendCompatible(["virtio,mmio"]) - soc_node.append(disk_node) - - # VirtIO MMIO rng node - rng = self.rng - rng_node = rng.generateBasicPioDeviceNode( - soc_state, "virtio_mmio", rng.pio_addr, rng.pio_size - ) - rng_node.append(FdtPropertyWords("interrupts", [rng.interrupt_id])) - rng_node.append( - FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) - ) - rng_node.appendCompatible(["virtio,mmio"]) - soc_node.append(rng_node) - - root.append(soc_node) - - fdt = Fdt() - fdt.add_rootnode(root) - fdt.writeDtsFile(os.path.join(outdir, "device.dts")) - fdt.writeDtbFile(os.path.join(outdir, "device.dtb")) - - @overrides(KernelDiskWorkload) - def get_default_kernel_args(self) -> List[str]: - return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"] - - @overrides(AbstractBoard) - def _connect_things(self) -> None: - """Connects all the components to the board. - - The order of this board is always: - - 1. Connect the memory. - 2. Connect the cache hierarchy. - 3. Connect the processor. - - Developers may build upon this assumption when creating components. - - Notes - ----- - - * The processor is incorporated after the cache hierarchy due to a bug - noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this - bug is fixed, this ordering must be maintained. - * Once this function is called `_connect_things_called` *must* be set - to `True`. - """ - - if self._connect_things_called: - raise Exception( - "The `_connect_things` function has already been called." - ) - - # Incorporate the memory into the motherboard. - self.get_local_memory().incorporate_memory(self) - # Add a NoncoherentXBar here - - self.remote_link = NoncoherentXBar( - frontend_latency=0, - forward_latency=0, - response_latency=750, - width=256, - ) - self.get_remote_memory().port = self.remote_link.mem_side_ports - self.get_cache_hierarchy().membus.mem_side_ports = ( - self.remote_link.cpu_side_ports - ) - - # self.get_remote_memory().port = \ - # self.get_cache_hierarchy().membus.mem_side_ports - # self.get_remote_memory().incorporate_memory(self) - - # Incorporate the cache hierarchy for the motherboard. - if self.get_cache_hierarchy(): - self.get_cache_hierarchy().incorporate_cache(self) - - # Incorporate the processor into the motherboard. - self.get_processor().incorporate_processor(self) - - self._connect_things_called = True - - @overrides(RiscvBoard) - def get_default_kernel_args(self) -> List[str]: - return [ - "console=ttyS0", - "root={root_value}", - "rw", - "init=/root/gem5-init.sh" - # "init=\"mount -t sysfs - /sys; mount -t proc - /proc; m5 exit;\"" - # "init=/bin/bash" - ] - - @overrides(AbstractBoard) - def _post_instantiate(self): - """Called to set up anything needed after m5.instantiate""" - print( - "__ranges__", self.get_remote_memory().physical_address_ranges[0] - ) - self.get_processor()._post_instantiate() - if self.get_cache_hierarchy(): - self.get_cache_hierarchy()._post_instantiate() - self.get_local_memory()._post_instantiate() - # self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory_setup/simulator_project.ipynb b/disaggregated_memory_setup/simulator_project.ipynb deleted file mode 100644 index e7756e81f5..0000000000 --- a/disaggregated_memory_setup/simulator_project.ipynb +++ /dev/null @@ -1,159 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "# This script generates plots for the disaggregated memory project.\n", - "# Run this script from amarillo.cs.ucdavis.edu\n", - "\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "plot_idx = 0\n", - "# overwrite this for each plot\n", - "data = np.zeros((3,4))\n", - "home_path = \"/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/\" \\\n", - " + \"SST13/gem5/ext/sst/final_data\"\n", - "kernels = [\"Copy\", \"Scale\", \"Add\", \"Triad\"]\n", - "bar_width = float(1/4)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Timing results" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Results 9\n", - "[0, 1, 2, 3] [[6.308304 5.890346 7.01689 6.978944]\n", - " [6.008993 5.932203 7.050845 6.956073]\n", - " [1.881847 1.874148 2.094582 2.086612]]\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAEICAYAAAB25L6yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAYGElEQVR4nO3df3RV5Z3v8ffHAMaiFYVchxIxuVoVf6KNFiu1nYo/iz+Hi+CPltY2HatdVudeq6OzBq/t7b0zVtuqvV1UrdpWQCuIo60/WnWqtxUMioIGf0xlagBrCCiCIgrf+8fZiUk4IeeQc5In4fNai0XO3s9++O69wufs85znPEcRgZmZpWuHvi7AzMy2zkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1JYMSRdJapD0vqTbCmhfLelXklokrZe0QNLETm0ek9Qsaa2k5ySd1mn/SEm3SFop6R1JSyVdLWlotj+yvtdJWi7pOkkV7fbt06m/6ZJ+2eOLYdaOg9pSsgL4LnBrdw0l7Q48CWwEDgRGANcDd0qa1K7pxcDIiPg4UA/8UtLIdn38CdgJOCoidgGOA4YBe7fr49CI2Bk4Fjgb+HoPztGsaIP6ugCzVhExB0BSHVDdTfNLgHXA+RGxOds2U9Jo4AeS7omc59v/E8BgYE9gJXAp8A5wbmsfEfE6uXDPV99SSU8AB23TCZptI99RW391HHBPu5BudRcwGti3dYOk+yVtAOYDjwMN2a4JwJw8feQl6QDgs8CzPSvdrDi+o7b+agS5u+LOVrbb/xJAREyUNJhcMI9pF8zDu+ijs2ckbQJWAzcDP+9J4WbFclBbf7UKGJln+8h2+9tExAfAbyVdLOnViLgPaOmij84Oj4hX82zfRG4opb3BwAcF9GlWMA99WH/1O+BMSZ1/hycDrwMvd3HcID56o/B3wBl5+ijUX4CaTttqgf/cxv7M8nJQWzIkDZJUCVQAFZIqJXX1qu96YFfgFkl/k7WdClwJ/I+ICEn7SzpJ0k6SBks6FzgG+Pesj+uAjwO3S9orq2FUNgXvkAJKng1clU0T3EHSBOAU4Nfbeg3M8nFQW0quAt4DLgfOzX6+Kl/DiGgBxgOVwIvkhjEuBc6LiNlZMwHTgTeBZnKzOc6KiGeyPlYDnyE3VDFf0jvA74G3gXxDHZ39T+CP5KYJrgH+BTgnIpYUc9Jm3ZG/OMDMLG2+ozYzS5yD2swscQ5qM7PEOajNzBJXlg+8jBgxImpqasrRtZnZgLRw4cJVEVGVb19ZgrqmpoaGhobuG5qZGQCSuvyglIc+zMwS56A2M0ucg9rMLHFePc/MtuqDDz6gqamJDRs29HUpA0JlZSXV1dUMHtx54cWuOajNbKuamprYZZddqKmpQVJfl9OvRQQtLS00NTVRW1tb8HEe+jCzrdqwYQPDhw93SJeAJIYPH170qxMHtZl1yyFdOttyLbsNakn7SVrU7s9aSd/elgLNzKx43Y5RR8RLwFgASRXAcmBuecsys1TVXP5ASftb9r+/2G0bSVx66aX84Ac/AODaa69l3bp1TJ8+nWnTpjFx4kQmTZrU1n7nnXdm3bp1LFu2jNraWq688kq++93vArBq1SpGjhzJN77xDW688ca2Y8aOHcv+++/PrFmzuqzj+9//PrfccgsVFRX8+Mc/5oQTTtjW0y5KsW8mHgv8R0T4q4ZsYJu+a4n7e7u0/W1ndtxxR+bMmcMVV1zBiBEjijq2traWBx54oC2o7777bg488MAObRobG9m0aRNPPPEE69evZ+jQoVv08+KLLzJr1ixeeOEFVqxYwYQJE3j55ZepqKjY9hMrULFBPQWYmW+HpHqgHmD06NE9LMtsYCn1XSgUdic6UAwaNIj6+nquv/56vve97xV17Mc+9jHGjBlDQ0MDdXV1zJ49m8mTJ7NixYq2NjNnzuS8886jsbGRefPmcfbZZ2/Rz7x585gyZQo77rgjtbW17LPPPixYsICjjjqqx+fXnYKDWtIQ4FTginz7I2IGMAOgrq7OXxtjZoVZ8Wz3bWIzF545njETzuHkc+pZ+dZ7vPvuBp5veos16zfyny3reb7prbbmmwOeb3qL5SvXsuGDTYybMJEbfnY752yu5L0Pg02Vw1i17rW29rNnz+aRRx5h6dKl3HDDDXmDevny5YwbN67tcXV1NcuXL+/RqReqmFkfJwHPRMRfy1WMmVlXPr7Lzkz8uynceeuMjjvyzKLoPLPi6M9P4KknHuPB++ZwwilndNjX0NDAiBEjGD16NMceeyzPPvssq1evLnn9PVFMUE+li2EPM7PecO75F3Dv7F/w3nvvtm0bttvurH37rbbHb69Zw7Ddd+9w3OAhQxhz8FjumHETx518Wod9M2fOZOnSpdTU1LD33nuzdu1a7rnnHubOncvYsWMZO3YsDQ0NjBo1itdff73tuKamJkaNGlWeE+2koKCWNBQ4DphT3nLMzLq26267cfzE05k76xdt2+qOOpqH/m0uH2zcCMC8u+/kiKM+u8WxX6q/kG9fMZ1dd9utbdvmzZu56667WLx4McuWLWPZsmXMmzePmTNncsYZZ7Bo0SIWLVpEXV0dp556KrNmzeL999/ntdde45VXXuHII48s/0lT4Bh1RKwHhpe5FjPrBwp+E7OQsedt8KX6i5h1281tjz834UQaFz/HlJM/T0VFBdV71XLV96/b4rh99hvDPvuN6bDtiSeeYNSoUXziE59o23bMMcfw4osvsnLlSkaOHNm2/cADD2Ty5MkccMABDBo0iJtuuqlXZnwAKKL07/vV1dWFvzjA+rUST8+r2XBnSfuD3pv10djYyJgxY7pv2FkZgvr5zYWvj1GIQ6qHlbS/QuW7ppIWRkRdvvb+CLmZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmifNXcZlZcUq9smD949020ajDubT+XL78TzcAcPtPb+Ddd9dzwaWX80+XfJNjJpzAcV/86BOH4/ar5qmXmlj++l84+TOH8vVv/QMXXXYVAGtWtzDhU/vzd+dMY9ZtP2s7plTLnF555ZXccccdrFmzhnXr1hV6FbbKd9RmlrwddxzCnN8+yprVLUUfO2r0Xvzh0YfbHj9y/73sve/+Hdp0XuY0n/bLnD744IN885vfZNOmTVu0O+WUU1iwYEHRdW6Ng9rMkjeoooL6c87klz/7SdHHVlbuxH/dZ19eeC73AZyH/m0ux088vUOb1mVOjz/+eObNm5e3n66WOe1s3LhxHT7RWAoOajPrFy6cNpnf3Hs376wt/ksYTjz1TB68bw5vrGhih4oKqvboGKSzZ89mypQpTJ06lZkz8689t3z5cvbcc8+2x6kuc2pm1me8zKmZWT/gZU7NzBKX6jKnxx57bFmHQTw9z8yKU+gX9W4ny5xu3ryZV199ld2zu/jLLruMO++8k3fffZfq6mq+9rWvMX369B6d84Bf5rQvvtreBgAvc9rGy5xu3ZIlS7j11lu57rotnxy64mVOzcx60UEHHVRUSG8LB7WZWeIc1GZmiXNQm5klrtBvIR8m6deSlkpqlHRUuQszM7OcQqfn/Qh4MCImSRoCfKyMNZmZWTvdBrWkXYFjgGkAEbER2FjesswsVQfffnBJ+1t83B3dtklhmdOWlhYmTZrE008/zbRp07jxxht7ctpFKeSOuhZoBn4u6VBgIXBxRORfC7CnSr3WLaWfv2pmvat1mdNTL2xht92HF3Vs6zKnrUFdyDKnQ4cO3aKfyspKrrnmGpYsWcKSJUu2/WS2QSFj1IOAw4H/GxGHAeuByzs3klQvqUFSQ3Nzc4nLNLPtWQrLnA4dOpTx48dTWVlZdA09VUhQNwFNETE/e/xrcsHdQUTMiIi6iKirqqoqZY1mZn2+zGlf6nboIyLekPS6pP0i4iXgWODF8pdm26zUw0eFru1gVkbtlzntcFdb4DKnN137vxheVbXVZU5HjRrFV7/6VVavXt22dkcKCp318S3gV9mMjz8DXylfSZaaUq+XAl4zxbbNuedfwJSTP8dpk89p21bsMqdzf/8Ujz/y27Z97Zc5BdqWOR0xYgRXX301ADfffDN1dXmX4egVBQV1RCwC+q5KMzM6LnN6+lnnArllTn91y085ddJUBg8ZstVlTuvGHd3lMqetK+g99thjXHPNNTz66KOcccYZW/TTF7zMqZkVZfGXFxfWcIAtcwpQU1PD2rVr2bhxI/feey8PP/wwBxxwQInPcEsOajNL3rpX/l/bz8Or/gvzX1nRYf/fX/Id/v6S72xx3Kg9RzPn93/aYvtpk8/mtMlnc0j1MJ566qkO+yoqKnjjjTfy1rFs2bJtqL7nvNaHmVniHNRmZolzUJtZt8rxTVDbq225lg5qM9uqyspKWlpaHNYlEBG0tLQU/elGv5loZltVXV1NU1MTRS8N8dabJa/lryV+smh8Z6eS9leIyspKqqurizrGQW1mWzV48GBqa7fhS2Wnjyt5LSeV+EuC+8sHrzz0YWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWuIJWz5O0DHgH2AR8GBH+RnIzs15SzDKnfxsRq8pWiZmZ5eWhDzOzxBUa1AE8LGmhpPp8DSTVS2qQ1FD0N0GYmVmXCg3q8RFxOHAScKGkYzo3iIgZEVEXEXVVVVUlLdLMbHtWUFBHxPLs7zeBucCR5SzKzMw+0m1QSxoqaZfWn4HjgSXlLszMzHIKmfWxBzBXUmv7OyPiwbJWZWZmbboN6oj4M3BoL9RiZmZ5eHqemVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWOAe1mVniHNRmZolzUJuZJc5BbWaWuIKDWlKFpGcl3V/OgszMrKNi7qgvBhrLVYiZmeVXUFBLqga+CNxc3nLMzKyzQu+ofwhcBmzuqoGkekkNkhqam5tLUZuZmVFAUEuaCLwZEQu31i4iZkREXUTUVVVVlaxAM7PtXSF31EcDp0paBswCviDpl2WtyszM2nQb1BFxRURUR0QNMAV4NCLOLXtlZmYGeB61mVnyBhXTOCIeBx4vSyVmZpaX76jNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLnoDYzS5yD2swscQ5qM7PEOajNzBLXbVBLqpS0QNJzkl6QdHVvFGZmZjmDCmjzPvCFiFgnaTDwpKTfRsRTZa7NzMwoIKgjIoB12cPB2Z8oZ1FmZvaRgsaoJVVIWgS8CTwSEfPztKmX1CCpobm5ucRlmpltvwoK6ojYFBFjgWrgSEkH5WkzIyLqIqKuqqqqxGWamW2/ipr1ERFvAY8BJ5alGjMz20Ihsz6qJA3Lft4JOA5YWua6zMwsU8isj5HA7ZIqyAX7XRFxf3nLMjOzVoXM+ngeOKwXajEzszz8yUQzs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxxDmozs8Q5qM3MEuegNjNLnIPazCxx3X65raQ9gTuAPYAAZkTEj8pdmJlt3cG3H1zS/hZ/eXFJ+7PS6TaogQ+Bf4iIZyTtAiyU9EhEvFjm2szMyqq/PNl1O/QRESsj4pns53eARmBUWaoxM7MtFDVGLakGOAyYn2dfvaQGSQ3Nzc0lKs/MzAoOakk7A/cA346ItZ33R8SMiKiLiLqqqqpS1mhmtl0rKKglDSYX0r+KiDnlLcnMzNrrNqglCbgFaIyI68pfkpmZtVfIrI+jgfOAxZIWZdv+MSJ+U7aqElbqd4lh+5wW1V/ebTdLQbdBHRFPAuqFWszMLA9/MtHMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0ucg9rMLHEOajOzxDmozcwS56A2M0tct0Et6VZJb0pa0hsFmZlZR4XcUd8GnFjmOszMrAvdBnVE/AFY3Qu1mJlZHiUbo5ZUL6lBUkNzc3OpujUz2+6VLKgjYkZE1EVEXVVVVam6NTPb7nnWh5lZ4hzUZmaJK2R63kzgT8B+kpoknV/+sszMrNWg7hpExNTeKMTMzPLz0IeZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIc1GZmiXNQm5klzkFtZpY4B7WZWeIKCmpJJ0p6SdKrki4vd1FmZvaRboNaUgVwE3AScAAwVdIB5S7MzMxyCrmjPhJ4NSL+HBEbgVnAaeUty8zMWikitt5AmgScGBFfyx6fB3w6Ii7q1K4eqM8e7ge8VPpyy2oEsKqvi+jnfA1Lw9ex5/rjNdwrIqry7RhUqn8hImYAM0rVX2+T1BARdX1dR3/ma1gavo49N9CuYSFDH8uBPds9rs62mZlZLygkqJ8GPimpVtIQYApwX3nLMjOzVt0OfUTEh5IuAh4CKoBbI+KFslfW+/rtsE1CfA1Lw9ex5wbUNez2zUQzM+tb/mSimVniHNRmZokb0EEt6W8kzZL0H5IWSvqNpH37uq7+RNKVkl6Q9LykRZI+XeTxNZKWlKu+/kjS6ZJC0v5d7H9c0hZTyyRNk3Rj+StMj6Th2e/fIklvSFre7vGQrM2pxS5xIWmZpBHlqbp0SjaPOjWSBMwFbo+IKdm2Q4E9gJf7srb+QtJRwETg8Ih4P/uFHtLHZQ0EU4Ens7//uY9r6RciogUYCyBpOrAuIq5t3S9pUETcxwCdkTaQ76j/FvggIn7auiEingOelPSvkpZIWizpLABJn5f0B0kPZAtQ/VTSDpK+KumHrX1I+rqk63v9bPrGSGBVRLwPEBGrImKFpCMk/VHSc5IWSNolu3N+QtIz2Z/PdO5MUkV27Z/O7tC/0etn1Mck7QyMB84nN9UVSTtlr/waJc0FdmrX/iuSXpa0ADi6T4pOlKTbsv+n84F/af+KQ9IpkuZLelbS7yTtkW0fLunh7FXizYD68hwKNZCD+iBgYZ7tZ5J7Zj4UmAD8q6SR2b4jgW+RW3xq76ztXcApkgZnbb4C3Fq+spPyMLBnFhQ/kfS57GXmbODiiGi9hu8BbwLHRcThwFnAj/P0dz7wdkQcARwBfF1Sba+cSTpOAx6MiJeBFkmfAi4A3o2IMeTusD8FkP1eXk0uoMeT+720jqqBz0TEpZ22PwmMi4jDyK1PdFm2/Z+BJyPiQHKvuEf3WqU9MGCHPrZiPDAzIjYBf5X07+RCYy2wICL+DCBpJjA+In4t6VFgoqRGYHBELO6r4ntTRKzLguSz5F6hzAa+B6yMiKezNmsBJA0FbpQ0FtgE5Hsv4HjgkGz9GIBdgU8Cr5XzPBIzFfhR9vOs7PE+ZE9sEfG8pOez/Z8GHo+IZgBJs8l/Xbdnd2f/lzurBmZnT3ZD+Oh37BhyN2BExAOS1vROmT0zkIP6BWBSt6066jypvPXxzcA/AkuBn/ewrn4l+0/wOPC4pMXAhV00vQT4K7lXKjsAG/K0EfCtiHioDKUmT9LuwBeAgyUFuQ+QBfBsnxbWv63vYvsNwHURcZ+kzwPTe6ugchjIQx+PAjtmq/oBIOkQ4C3grGy8tIrcM+yCrMmR2UfldyD38v1JgIiYT269k7OBmb13Cn1L0n6SPtlu01igERgp6YiszS6SBpG7O14ZEZuB88iFUGcPARe0DiNJ2je7E99eTAJ+ERF7RURNROxJ7k5vIbnfLSQdBByStZ8PfC4bVx0M/Le+KLqf2pWP1iT6crvtf+Cja30SsFsv17VNBuwddUSEpDOAH0r6Drk7vGXAt4GdgefI3c1cFhFvZFOlngZuJPdS9DFyY1it7gLGRkS/eKlUIjsDN0gaBnwIvEpuKdufZ9t3Ijc+PQH4CXCPpC8BD5L/TudmoAZ4JpuV0wycXt5TSMpU4P902nYPcBiwUza01kj23kpErMxmOPyJ3A3Got4qdACYDtydDW08CrS+F3I1MFPSC8Afgb/0TXnF8UfIM9nLo/8eERO72H8/cH1E/L436zIzG8hDHyUhaZikl4H3HNJm1hd8R21mljjfUZuZJc5BbWaWOAe1mVniHNRmZolzUJuZJe7/A8g7kct7rA82AAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# O3 Results\n", - "\n", - "print(\"Results \", plot_idx)\n", - "plot_idx += 1\n", - "\n", - "plt.title(\"1 O3CPU\")\n", - "f = open(home_path + \"/m5out_node_O3_0/system.platform.terminal\", \"r\")\n", - "exp = 0\n", - "kernel = 0\n", - "for lines in f.read().split(\"\\n\"):\n", - " if \"Bandwidth\" in lines:\n", - " data[exp][kernel] = float(lines.split(\" \")[1])\n", - " kernel += 1\n", - " if kernel % 4 == 0:\n", - " exp += 1\n", - " kernel = 0\n", - "\n", - "print([i for i in range(4)], data)\n", - "\n", - "x_axis = np.arange(len(kernels))\n", - "plt.xticks(x_axis, kernels)\n", - "\n", - "plt.bar(x_axis, data[0], bar_width, label=\"NUMA-0\")\n", - "plt.bar(x_axis - bar_width, data[1], bar_width, label=\"NUMA-0,1\")\n", - "plt.bar(x_axis + bar_width, data[2], bar_width, label=\"NUMA-1\")\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Results 13\n", - "[0, 1, 2, 3] [[10.324512 10.065496 11.409131 11.371691]\n", - " [10.31722 10.129058 11.51054 11.468607]\n", - " [ 7.576464 7.489263 8.33215 8.295167]]\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEICAYAAABGaK+TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXt0lEQVR4nO3de5RU5Z3u8e9jAzZeRlR6HKQ13eMVvAST1uBljCfgJQY1RoaAxkhuJDF6Ep1ZXg4zR3KSrGROjGYUc7KIGjWJDaIiJs4ojsZRE0UbQZCLl4mMNhdtWhAREYXf+aM22DQNXV21+/LC81mrF1273v3Wr/Zqnnrrrb3fUkRgZmbp2aW7CzAzs9I4wM3MEuUANzNLlAPczCxRDnAzs0Q5wM3MEuUANzNLlAPckiHpEEnrJP22nXaDJd0v6W1J70j6o6QTWtzfX9KfJDVLWiXpKUknturjUElTJa3I+pkr6XJJFZJqJIWkNdnPYklXZfttuq9Xq/5uk/TDPI+HmQPcUnIT8Oz2Gkg6CPgTMA+oBfYHpgEzJB2fNVsDfBWoAvYG/gX4/abQzfqYCbwOHBURewF/D9QBe7Z4uH4RsQcwBvjfks7I40maFcsBbkmQNBpYBTzSTtMJwFMRMT4i3oqIdyLiBuA3FIKaiFgXES9GxEZAwAYKQb5P1sf3gT9HxOURsSzb58WIOD8iVrV+wIh4CpgPHFnm0zTrEAe49XiS/gr4P8DlRTQ/FZjaxva7gBMl9W3R71xgHXA/cHNEvJndNRy4u8jalE2/HAHMLmYfs7z0ar+JWbf7AXBLRDRKaq9tf2BZG9uXURiw7AMsAYiIoyVVAucCfVq03XcbfbS2AghgOXBVRDwiqaaI/cxy4QC3Hk3SEAoj4mOK3GUFMKCN7QOAjcDKlhsjYh1QL2mhpDkR8TzQvI0+WusfER+22rbpdu8Wv2+6/UERfZoVzVMo1tOdAtQAr0laDvwjcJ6k57bR/j8ofODY2igKc+Nrt7Ffb+BvW/RxXon1LqMQ1DWtttcC/11in2ZtcoBbTzcJOAgYkv38EngAOH0b7b8PnCDpR5L2kbSnpEuBLwNXAkgaKukkSX0k9ZV0JbAfhTNPAK7J+vippL/J9jlY0m8l9dtesRGxAbgH+JGkfSX1ljQGGAz8e4nHwKxNDnDr0SJibUQs3/RD4RTAdRHRtI32LwMnAR8HFlMYEZ8HnB4Rf8qa7UrhlMRmCvPhZwKfi4ilWR//BRxPYRQ9X9LbFEK5AXiniLIvBt4C5gJvApdk/b/RsWdvtn3yFzqYmaXJI3Azs0Q5wM3MEuUANzNLVLsBLulWSW9KeqHFtp9KWpQt8DOtvU/mzcwsf+1+iCnpZAqf/N8REUdm204DHo2IDyVtWl/iyvYerH///lFTU1N20WZmO5NZs2atiIiq1tvbvRIzIh5vfXlwRMxocfNpYGQxRdTU1NDQ0FBMUzMzy0hq8yKwPObAv8p2LlCQNE5Sg6SGpqY2T901M7MSlBXgksZTWO/hd9tqExGTIqIuIuqqqrZ6B2BmZiUqeTErSWOBEcCw8NVAZmZdrqQAz7555Arg09tZHMjMdmAffPABjY2NrFu3rrtL2WFUVlZSXV1N7969i2rfboBLqqewIlx/SY0UFvq5msJ6Eg9n6zM/HRHfKrVoM0tPY2Mje+65JzU1NRSxTru1IyJobm6msbGR2traovYp5iyUMW1svqWjxZnZjmXdunUO7xxJYt9996UjJ3v4SkwzK5nDO18dPZ4OcDOzRPkr1cwsFzVXPZBrf4t/8rl220ji8ssv52c/+xkA1157LWvWrGHChAmMHTuWESNGMHLkR9cZ7rHHHqxZs4bFixdTW1vL+PHj+eEPfwjAihUrGDBgAN/85jeZOHHi5n2GDBnC4YcfzuTJk7dZx49//GNuueUWKioquOGGGzj99G1930i+HOBmHTFhr07o8+38+9wRLJ3dbpNdd+3DvVMnc/XVV9O/f/8OdV9bW8sDDzywOcCnTp3KEUccsUWbhQsXsmHDBp544gneffdddt999636WbBgAZMnT2b+/PksXbqU4cOH89JLL1FRUdGhekrhKRQzS1avigrGXfAFrr/++g7vu9tuuzFo0KDNy3tMmTKFUaNGbdGmvr6eCy+8kNNOO43p06e32c/06dMZPXo0u+66K7W1tRx88ME888wzHX8yJfAI3KybdcfUw47kO2NHMWj4BZx5wTiWrXqPtWvXMbdxFSvfXc9/N7/L3MZVm9tuDJjbuIoly1az7oMNDB0+ght/dTsXbKzkvQ+DDZX9WLHm1c3tp0yZwsMPP8yiRYu48cYbOf/887d6/CVLljB06NDNt6urq1myZEmnPudNPAI3s6T91Z57MOK80dx566Qt72jjjI7WZ3mceMpwnn7ijzx4/72cfta5W9zX0NBA//79OfDAAxk2bBizZ8/mrbfeyr3+cjjAzSx5X/rat7lvym94772PLgzvt/c+rH571ebbb69cSb999tliv959+jDoqCHcMekmTj3znC3uq6+vZ9GiRdTU1HDQQQexevVq7rnnHqZNm8aQIUMYMmQIDQ0NDBw4kNdff33zfo2NjQwcOLBznmgrDnAzS95ee+/NaSM+z7TJv9m8re74E3no99P4YP16AKZPvZNjj/+7rfb98rjv8L2rJ7DX3ntv3rZx40buuusu5s2bx+LFi1m8eDHTp0+nvr6ec889lzlz5jBnzhzq6uo4++yzmTx5Mu+//z6vvvoqL7/8Mscdd1znP2k8B25mOenQ3HsRZ5h01JfHXcLk227efPvTw89g4bznGX3mKVRUVFD9sVr+6cfXbbXfwYcN4uDDBm2x7YknnmDgwIHsv//+m7edfPLJLFiwgGXLljFgwIDN24844ghGjRrF4MGD6dWrFzfddFOXnIECRXwjT57q6urCX+hgSeuE0whr1t2Za39d9SHmwoULGTRoUPsN25JzgM/dWNzaIcU6urpfrv11RFvHVdKsiKhr3dZTKGZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klyueBm1k+8j7Fctxj7TbRwE9w+bgvcdE/3wjA7b+8kbVr3+Xbl1/FP192MScPP51TP/fRFZZDD6vm6RcbWfL6a5x5wsf5xqX/wCVX/BMAK99qZvgnD+e8C8Yy+bZfbd4nr+Vkx48fzx133MHKlStZs2ZNsUdhuzwCN7Nk7bprH+7990dZ+VZzh/cdeODHePzRGZtvP/yH+zjo0MO3aNN6Odm2tFxO9sEHH+Tiiy9mw4YNW7U766yzcl+l0AFuZsnatJzsb3/1iw7vW1nZl789+FDmP1+4qOih30/jtBGf36JNnsvJDh06dIsrOPPgADezpH1n7Cj+7b6pvLO641+MccbZX+DB++9l+dJGdqmooGq/LQN2ypQpjB49mjFjxlBfX99mH0uWLOGAAw7YfNvLyZqZFcnLyZqZJczLyZqZJaqnLic7bNiwTp1OSec0wpxPUcp7BTjY+b7KymwLHfly5p1gOdmNGzfyyiuvsE826r/iiiu48847Wbt2LdXV1Xz9619nwoQJZT3ndJaTdYBbT+DlZDfzcrLb98ILL3Drrbdy3XVbv2hsj5eTNTPrZkceeWSHw7uj2g1wSbdKelPSCy227SPpYUkvZ//uvb0+zMwsf8XMgd8GTATuaLHtKuCRiPiJpKuy21fmX57lqhPe/ndo3tPMctVugEfE45JqWm0+Bzgl+/124DEc4DulmqseyLU/f45gVrxS58D3i4hl2e/Lgf221VDSOEkNkhqamppKfDgzM2ut7A8xo3AayzZPZYmISRFRFxF1VVVV5T6cmZllSj0P/A1JAyJimaQBwJt5FmVm6Tnq9qNy7W/eqXe026YnLCfb3NzMyJEjefbZZxk7diwTJ04s52l3SKkj8PuBi7LfLwLaXqbLzKwT9YTlZCsrK/nBD37Atdde2+EaylXMaYT1wFPAYZIaJX0N+AlwqqSXgeHZbTOzLtUTlpPdfffdOemkk6isrOxwDeVqN8AjYkxEDIiI3hFRHRG3RERzRAyLiEMiYnhE9Kwlusxsp9Hdy8l2J1+JaWZJ83KyZmYJ687lZLuTA9zMktedy8l2p3SWkzWzHm3eRfOKb7wDLScLUFNTw+rVq1m/fj333XcfM2bMYPDgwTk/w605wM0sWWte/tPm3/et+mtmvrx0i/u/ddmVfOuyrVf5GHjAgdz7yFNbbT9n1PmcM+p8jq7ux9NPP73FfRUVFSxfvrzNOhYvXlxC9eXzFIqZWaIc4GZmiXKAm1nJuvIbvXYGHT2eDnAzK0llZSXNzc0O8ZxEBM3NzR26otMfYppZSaqrq2lsbKSkZaJX5bv+3Rs5v4gsfKdvrv0Vq7Kykurq6qLbO8DNrCS9e/emtrbELxOeMDTXWj6b6BdDl8tTKGZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmifJqhGY7mKNuPyrX/jr0ZcXWpRzgZmat5P0iCJ3zQugpFDOzRJUV4JIukzRf0guS6iUV/11AZmZWlpIDXNJA4H8CdRFxJFABjM6rMDMz275yp1B6AX0l9QJ2A5aWX5KZmRWj5ACPiCXAtcBrwDLg7YiY0bqdpHGSGiQ1lPTlp2Zm1qZyplD2Bs4BaoH9gd0lfal1u4iYFBF1EVFXVVVVeqVmZraFcqZQhgOvRkRTRHwA3AuckE9ZZmbWnnIC/DVgqKTdJAkYBizMpywzM2tPOXPgM4G7geeAeVlfk3Kqy8zM2lHWlZgRcQ1wTU61mJlZB/hKTDOzRHktlBx5ESEz60oegZuZJcojcOtR/C7GrHgegZuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiygpwSf0k3S1pkaSFko7PqzAzM9u+XmXu/6/AgxExUlIfYLccajIzsyKUHOCS9gJOBsYCRMR6YH0+ZZmZWXvKmUKpBZqAX0uaLelmSbu3biRpnKQGSQ1NTU1lPJyZmbVUToD3Aj4B/L+IOAZ4F7iqdaOImBQRdRFRV1VVVcbDmZlZS+UEeCPQGBEzs9t3Uwh0MzPrAiUHeEQsB16XdFi2aRiwIJeqzMysXeWehXIp8LvsDJS/AF8pvyQzMytGWQEeEXOAunxKMTOzjvCVmGZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpYoB7iZWaIc4GZmiXKAm5klygFuZpaosgNcUoWk2ZL+kEdBZmZWnDxG4N8FFubQj5mZdUBZAS6pGvgccHM+5ZiZWbHKHYH/HLgC2LitBpLGSWqQ1NDU1FTmw5mZ2SYlB7ikEcCbETFre+0iYlJE1EVEXVVVVakPZ2ZmrZQzAj8ROFvSYmAy8BlJv82lKjMza1fJAR4RV0dEdUTUAKOBRyPiS7lVZmZm2+XzwM3MEtUrj04i4jHgsTz6MjOz4ngEbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWKAe4mVmiHOBmZokqOcAlHSDpj5IWSJov6bt5FmZmZtvXq4x9PwT+ISKek7QnMEvSwxGxIKfazMxsO0oegUfEsoh4Lvv9HWAhMDCvwszMbPtymQOXVAMcA8xs475xkhokNTQ1NeXxcGZmRg4BLmkP4B7gexGxuvX9ETEpIuoioq6qqqrchzMzs0xZAS6pN4Xw/l1E3JtPSWZmVoxyzkIRcAuwMCKuy68kMzMrRjkj8BOBC4HPSJqT/ZyZU11mZtaOkk8jjIgnAeVYi5mZdYCvxDQzS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NEOcDNzBLlADczS5QD3MwsUQ5wM7NElRXgks6Q9KKkVyRdlVdRZmbWvpIDXFIFcBPwWWAwMEbS4LwKMzOz7StnBH4c8EpE/CUi1gOTgXPyKcvMzNqjiChtR2kkcEZEfD27fSHwqYi4pFW7ccC47OZhwIull9tt+gMruruIxPkYls/HsHypHsOPRURV6429OvtRI2ISMKmzH6czSWqIiLruriNlPobl8zEs3452DMuZQlkCHNDidnW2zczMukA5Af4scIikWkl9gNHA/fmUZWZm7Sl5CiUiPpR0CfAQUAHcGhHzc6usZ0l6CqiH8DEsn49h+XaoY1jyh5hmZta9fCWmmVmiHOBmZonaaQNc0t9ImizpvyTNkvRvkg7t7rpSIWm8pPmS5kqaI+lTHdy/RtILnVVfiiR9XlJIOnwb9z8maatT4CSNlTSx8yvseSTtm/39zZG0XNKSFrf7ZG3O7uhSH5IWS+rfOVXnp9PPA++JJAmYBtweEaOzbR8H9gNe6s7aUiDpeGAE8ImIeD/7Q+/TzWXtCMYAT2b/XtPNtSQhIpqBIQCSJgBrIuLaTfdL6hUR97ODniG3s47A/wfwQUT8ctOGiHgeeFLSTyW9IGmepC8CSDpF0uOSHsgW7/qlpF0kfVXSzzf1Iekbkq7v8mfT9QYAKyLifYCIWBERSyUdK+nPkp6X9IykPbOR9hOSnst+TmjdmaSK7Lg/m43ov9nlz6ibSdoDOAn4GoVTcpHUN3uXuFDSNKBvi/ZfkfSSpGeAE7ul6B5K0m3Z/9GZwP9t+Q5F0lmSZkqaLek/JO2Xbd9X0ozsXeXNgLrzORRrZw3wI4FZbWz/AoVX848Dw4GfShqQ3XcccCmFhbsOytreBZwlqXfW5ivArZ1Xdo8xAzggC5BfSPp09nZ1CvDdiNh0/N4D3gROjYhPAF8Ebmijv68Bb0fEscCxwDck1XbJM+k5zgEejIiXgGZJnwS+DayNiEEURuSfBMj+Jr9PIbhPovA3aVuqBk6IiMtbbX8SGBoRx1BYv+mKbPs1wJMRcQSFd+cHdlmlZdgpp1C24ySgPiI2AG9I+k8KgbIaeCYi/gIgqR44KSLulvQoMELSQqB3RMzrruK7SkSsyQLm7yi8m5kC/AhYFhHPZm1WA0jaHZgoaQiwAWjrc4bTgKOz9XUA9gIOAV7tzOfRw4wB/jX7fXJ2+2CyF7yImCtpbnb/p4DHIqIJQNIU2j6uO7Op2f/j1qqBKdmLYB8++hs7mcKgjIh4QNLKrimzPDtrgM8HRrbbakutT5jfdPtm4H8Bi4Bfl1lXMrL/HI8Bj0maB3xnG00vA96g8K5mF2BdG20EXBoRD3VCqT2epH2AzwBHSQoKF8YFMLtbC0vbu9vYfiNwXUTcL+kUYEJXFdQZdtYplEeBXbOVEgGQdDSwCvhiNidbReFV+ZmsyXHZsgG7UJgKeBIgImZSWBPmfKC+655C95F0mKRDWmwaAiwEBkg6Nmuzp6ReFEbTyyJiI3AhhXBq7SHg25umoiQdmo3cdxYjgd9ExMcioiYiDqAwMpxF4e8KSUcCR2ftZwKfzuZtewN/3x1FJ2ovPlqz6aIW2x/no2P9WWDvLq6rJDvlCDwiQtK5wM8lXUlhVLgY+B6wB/A8hRHQFRGxPDut61lgIoW3tX+kME+2yV3AkIhI4m1XDvYAbpTUD/gQeIXCksG/zrb3pTD/PRz4BXCPpC8DD9L2yOhmoAZ4LjtDqAn4fOc+hR5lDPAvrbbdAxwD9M2m5xaSfW4TEcuyMy6eojDomNNVhe4AJgBTsymSR4FNn7V8H6iXNB/4M/Ba95TXMb6UvgjZW61/jIgR27j/D8D1EfFIV9ZlZju3nXUKJReS+kl6CXjP4W1mXc0jcDOzRHkEbmaWKAe4mVmiHOBmZolygJuZJcoBbmaWqP8Pz0bgU31bkeQAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# O3 Results\n", - "\n", - "print(\"Results \", plot_idx)\n", - "plot_idx += 1\n", - "\n", - "plt.title(\"4 O3CPU\")\n", - "f = open(home_path + \"/m5out_node_0/system.platform.terminal\", \"r\")\n", - "exp = 0\n", - "kernel = 0\n", - "for lines in f.read().split(\"\\n\"):\n", - " if \"Bandwidth\" in lines:\n", - " data[exp][kernel] = float(lines.split(\" \")[1])\n", - " kernel += 1\n", - " if kernel % 4 == 0:\n", - " exp += 1\n", - " kernel = 0\n", - "\n", - "print([i for i in range(4)], data)\n", - "\n", - "x_axis = np.arange(len(kernels))\n", - "plt.xticks(x_axis, kernels)\n", - "\n", - "plt.bar(x_axis, data[0], bar_width, label=\"NUMA-0\")\n", - "plt.bar(x_axis - bar_width, data[1], bar_width, label=\"NUMA-0,1\")\n", - "plt.bar(x_axis + bar_width, data[2], bar_width, label=\"NUMA-1\")\n", - "plt.legend()\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/disaggregated_memory_setup/traffic_gen_sst.py b/disaggregated_memory_setup/traffic_gen_sst.py deleted file mode 100644 index 269ff8c13e..0000000000 --- a/disaggregated_memory_setup/traffic_gen_sst.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) 2021-2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -This script is used for running a traffic generator connected to a memory -device. It supports linear and random accesses with a configurable amount -of write traffic. - -By default, this scripts runs with one channel (two pseudo channels) of HBM2 -and this channel is driven with 32GiB/s of traffic for 1ms. -""" - -import argparse - -from m5.objects import MemorySize, AddrRange - -# from gem5.components.boards.test_board import TestBoard - -from test_board_sst import TestBoardForSST - -from gem5.components.processors.linear_generator import LinearGenerator -from gem5.components.processors.random_generator import RandomGenerator - -from gem5.components.memory.hbm import HighBandwidthMemory -from gem5.components.memory.dram_interfaces.hbm import HBM_2000_4H_1x64 - -from gem5.simulate.simulator import Simulator - -# For hooking up SST with this system. -from m5.objects import OutgoingRequestBridge - - -def generator_factory( - generator_class: str, rd_perc: int, mem_size: MemorySize -): - rd_perc = int(rd_perc) - if rd_perc > 100 or rd_perc < 0: - raise ValueError( - "Read percentage has to be an integer number between 0 and 100." - ) - if generator_class == "LinearGenerator": - return LinearGenerator( - duration="1ms", rate="32GiB/s", max_addr=mem_size, rd_perc=rd_perc - ) - elif generator_class == "RandomGenerator": - return RandomGenerator( - duration="1ms", rate="32GiB/s", max_addr=mem_size, rd_perc=rd_perc - ) - else: - raise ValueError(f"Unknown generator class {generator_class}") - - -parser = argparse.ArgumentParser( - description="A traffic generator that can be used to test a gem5 " - "memory component." -) - -parser.add_argument( - "--generator-class", - type=str, - help="The class of generator to use.", - choices=[ - "LinearGenerator", - "RandomGenerator", - ], - default="LinearGenerator", -) - -parser.add_argument( - "--memory-size", type=str, help="Memory size as a string", default="1GiB" -) - -parser.add_argument( - "--read-percentage", - type=int, - help="Percentage of read requests in the generated traffic.", - default=100, -) - - -args = parser.parse_args() - -# Single pair of HBM2 pseudo channels. This can be replaced with any -# single ported memory device -# memory = HighBandwidthMemory(HBM_2000_4H_1x64, 1, 128) -memory_size = args.memory_size -# sst_memory = OutgoingRequestBridge(physical_address_ranges = AddrRange(start = 0x0, size = memory_size)) - -# print("mem-size: ", str(sst_memory.physical_address_ranges[0])[2:]) - -generator = generator_factory( - args.generator_class, - args.read_percentage, - int(str(AddrRange(0x0, memory_size))[2:]), -) - -# We use the Test Board. This is a special board to run traffic generation -# tasks. Can replace the cache_hierarchy with any hierarchy to simulate the -# cache as well as the memory -board = TestBoardForSST( - clk_freq="1GHz", # Ignored for these generators - generator=generator, # We pass the traffic generator as the processor. - # memory=sst_memory, - remote_memory_size=memory_size, - memory=None, - # With no cache hierarchy the test board will directly connect the - # generator to the memory - cache_hierarchy=None, -) -board._pre_instantiate() -root = Root(full_system=True, system=board) -# simulator = Simulator(board=board) -# simulator.run() From edbaf2a0fd23b636c2366c5489378821d5ca2fdf Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Tue, 21 Nov 2023 20:03:24 -0800 Subject: [PATCH 10/23] ext,configs: added stdlib examples to sst scripts This change create config script for both gem5 and SST and allows SST to run these scripts using one or more ISAs at the same time. Signed-off-by: Kaustav Goswami --- disaggregated_memory/boards/arm_sst_board.py | 1 + .../configs/arm-sst-numa-nodes.py | 53 +++- .../configs/riscv-sst-numa-nodes.py | 49 +++- ...rd_full_mem.py => example_arm_dm_board.py} | 78 ++---- ext/sst/sst/example_board.py | 144 ----------- ext/sst/sst/example_board_x86.py | 144 ----------- ext/sst/sst/example_freq.py | 139 ----------- ...des_w_board_arm.py => example_multiISA.py} | 91 +++---- ext/sst/sst/example_nodes.py | 221 ----------------- ext/sst/sst/example_nodes_multi_ISA.py | 234 ------------------ ...s_w_board.py => example_riscv_dm_board.py} | 81 ++---- ext/sst/sst/example_traffic_gen.py | 218 ---------------- 12 files changed, 169 insertions(+), 1284 deletions(-) rename ext/sst/sst/{example_nodes_w_board_full_mem.py => example_arm_dm_board.py} (70%) delete mode 100644 ext/sst/sst/example_board.py delete mode 100644 ext/sst/sst/example_board_x86.py delete mode 100644 ext/sst/sst/example_freq.py rename ext/sst/sst/{example_nodes_w_board_arm.py => example_multiISA.py} (73%) delete mode 100644 ext/sst/sst/example_nodes.py delete mode 100644 ext/sst/sst/example_nodes_multi_ISA.py rename ext/sst/sst/{example_nodes_w_board.py => example_riscv_dm_board.py} (69%) delete mode 100644 ext/sst/sst/example_traffic_gen.py diff --git a/disaggregated_memory/boards/arm_sst_board.py b/disaggregated_memory/boards/arm_sst_board.py index 5f30dc2d23..1e60803ba8 100644 --- a/disaggregated_memory/boards/arm_sst_board.py +++ b/disaggregated_memory/boards/arm_sst_board.py @@ -37,6 +37,7 @@ AddrRange, VoltageDomain, SrcClockDomain, + NoncoherentXBar, Terminal, VncServer, IOXBar, diff --git a/disaggregated_memory/configs/arm-sst-numa-nodes.py b/disaggregated_memory/configs/arm-sst-numa-nodes.py index b35498e15c..353a219c95 100644 --- a/disaggregated_memory/configs/arm-sst-numa-nodes.py +++ b/disaggregated_memory/configs/arm-sst-numa-nodes.py @@ -35,6 +35,7 @@ import os import sys +import argparse # all the source files are one directory above. sys.path.append( @@ -57,6 +58,49 @@ from gem5.resources.workload import * from gem5.resources.resource import * +# SST passes a couple of arguments for this system to simulate. +parser = argparse.ArgumentParser() +parser.add_argument("--command", type=str, help="Command run by guest") +parser.add_argument( + "--cpu-type", + type=str, + choices=["atomic", "timing", "o3"], + default="atomic", + help="CPU type", +) +parser.add_argument( + "--cpu-clock-rate", + type=str, + required=True, + help="CPU Clock", +) +parser.add_argument( + "--local-memory-size", + type=str, + required=True, + help="Local memory size", +) +parser.add_argument( + "--remote-memory-addr-range", + type=str, + required=True, + help="Remote memory range", +) +parser.add_argument( + "--remote-memory-latency", + type=int, + required=True, + help="Remote memory latency in Ticks (has to be converted prior)", +) +args = parser.parse_args() +cpu_type = { + "o3" : CPUTypes.O3, + "atomic": CPUTypes.ATOMIC, + "timing": CPUTypes.TIMING}[args.cpu_type] + +remote_memory_range = list(map(int, args.remote_memory_range.split(","))) +remote_memory_range = AddrRange(remote_memory_range[0], remote_memory_range[1]) + # This runs a check to ensure the gem5 binary is compiled for RISCV. requires(isa_required=ISA.ARM) # Here we setup the parameters of the l1 and l2 caches. @@ -64,14 +108,15 @@ l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" ) # Memory: Dual Channel DDR4 2400 DRAM device. -local_memory = DualChannelDDR4_2400(size="2GiB") +local_memory = DualChannelDDR4_2400(size=args.local_memory_range) # Either suppy the size of the remote memory or the address range of the # remote memory. Since this is inside the external memory, it does not matter # what type of memory is being simulated. This can either be initialized with # a size or a memory address range, which is mroe flexible. Adding remote # memory latency automatically adds a non-coherent crossbar to simulate latenyc remote_memory = ExternalRemoteMemoryInterface( - addr_range=AddrRange(0x100000000, size="2GiB"), remote_memory_latency=750 + addr_range=remote_memory_range, + remote_memory_latency=args.remote_memory_latency ) # Here we setup the processor. We use a simple processor. processor = SimpleProcessor( @@ -79,8 +124,8 @@ ) # Here we setup the board which allows us to do Full-System ARM simulations. board = ArmSstDMBoard( - clk_freq="3GHz", - processor=processor, + clk_freq=args.cpu_clock_rate, + processor=cpu_type, local_memory=local_memory, remote_memory=remote_memory, cache_hierarchy=cache_hierarchy, diff --git a/disaggregated_memory/configs/riscv-sst-numa-nodes.py b/disaggregated_memory/configs/riscv-sst-numa-nodes.py index 0e07e13237..2cf0ddb788 100644 --- a/disaggregated_memory/configs/riscv-sst-numa-nodes.py +++ b/disaggregated_memory/configs/riscv-sst-numa-nodes.py @@ -35,6 +35,7 @@ import os import sys +import argparse # all the source files are one directory above. sys.path.append( @@ -58,6 +59,49 @@ from gem5.resources.workload import * from gem5.resources.resource import * +# SST passes a couple of arguments for this system to simulate. +parser = argparse.ArgumentParser() +parser.add_argument("--command", type=str, help="Command run by guest") +parser.add_argument( + "--cpu-type", + type=str, + choices=["atomic", "timing", "o3"], + default="atomic", + help="CPU type", +) +parser.add_argument( + "--cpu-clock-rate", + type=str, + required=True, + help="CPU Clock", +) +parser.add_argument( + "--local-memory-size", + type=str, + required=True, + help="Local memory size", +) +parser.add_argument( + "--remote-memory-addr-range", + type=str, + required=True, + help="Remote memory range", +) +parser.add_argument( + "--remote-memory-latency", + type=int, + required=True, + help="Remote memory latency in Ticks (has to be converted prior)", +) +args = parser.parse_args() +cpu_type = { + "o3" : CPUTypes.O3, + "atomic": CPUTypes.ATOMIC, + "timing": CPUTypes.TIMING}[args.cpu_type] + +remote_memory_range = list(map(int, args.remote_memory_range.split(","))) +remote_memory_range = AddrRange(remote_memory_range[0], remote_memory_range[1]) + # This runs a check to ensure the gem5 binary is compiled for RISCV. requires(isa_required=ISA.RISCV) # Here we setup the parameters of the l1 and l2 caches. @@ -65,14 +109,15 @@ l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" ) # Memory: Dual Channel DDR4 2400 DRAM device. -local_memory = DualChannelDDR4_2400(size="2GiB") +local_memory = DualChannelDDR4_2400(size=args.local_memory_range) # Either suppy the size of the remote memory or the address range of the # remote memory. Since this is inside the external memory, it does not matter # what type of memory is being simulated. This can either be initialized with # a size or a memory address range, which is mroe flexible. Adding remote # memory latency automatically adds a non-coherent crossbar to simulate latenyc remote_memory = ExternalRemoteMemoryInterface( - addr_range=AddrRange(0x100000000, size="2GiB"), remote_memory_latency=750 + addr_range=remote_memory_range, + remote_memory_latency=args.remote_memory_latency ) # Here we setup the processor. We use a simple processor. processor = SimpleProcessor( diff --git a/ext/sst/sst/example_nodes_w_board_full_mem.py b/ext/sst/sst/example_arm_dm_board.py similarity index 70% rename from ext/sst/sst/example_nodes_w_board_full_mem.py rename to ext/sst/sst/example_arm_dm_board.py index 00e492f8c2..0da906066c 100644 --- a/ext/sst/sst/example_nodes_w_board_full_mem.py +++ b/ext/sst/sst/example_arm_dm_board.py @@ -31,41 +31,8 @@ from sst import UnitAlgebra -cache_link_latency = "1ns" - -bbl = "riscv-boot-exit-nodisk" -cpu_clock_rate = "3GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory -# currently, we do not subtract 0x80000000 from the request's address to get the "real" address -# so, the mem_size would always be 2GiB larger than the desired memory size -# memory_size_gem5 = "2GiB" -# memory_size_sst = "4GiB" -# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() - -l1_params = { - "access_latency_cycles" : "1", - "cache_frequency" : cpu_clock_rate, - "replacement_policy" : "lru", - "coherence_protocol" : "MESI", - "associativity" : "4", - "cache_line_size" : "64", - "cache_size" : "32 MiB", - "L1" : "1", -} - -dirNicParams = { - "network_bw" : "25GB/s", - "group" : 1, -} - -def create_cache(name, params = None): - cache = sst.Component(name, "memHierarchy.Cache") - if params is None: - cache.addParams(l1_params) - else: - cache.addParams(params) - return cache - +cache_link_latency = "1ps" +cpu_clock_rate = "4.2GHz" def connect_components(link_name: str, low_port_name: str, low_port_idx: int, high_port_name: str, high_port_idx: int, @@ -84,22 +51,22 @@ def connect_components(link_name: str, # =========================================================================== # -# Define the number of gem5 nodes in the system. +# Define the number of gem5 nodes in the system. anything more than 1 needs +# mpirun to run the sst binary. system_nodes = 2 # Define the total number of SST Memory nodes memory_nodes = 1 # This example uses fixed number of node size -> 2 GiB -# TODO: Fix this in the later version of the script. # The directory controller decides where the addresses are mapped to. node_memory_slice = "2GiB" remote_memory_slice = "2GiB" # SST memory node size. Each system gets a 2 GiB slice of fixed memory. -sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +sst_memory_size = str( + (memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) +"GiB" addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() -print(sst_memory_size) # There is one cache bus connecting all gem5 ports to the remote memory. mem_bus = sst.Component("membus", "memHierarchy.Bus") @@ -129,22 +96,21 @@ def connect_components(link_name: str, # Each of the nodes needs to have the initial parameters. We might need to # to supply the instance count to the Gem5 side. This will enable range # adjustments to be made to the DTB File. + node_range = [0x80000000 + (node + 1) * 0x80000000, + 0x80000000 + (node + 2) * 0x80000000] + print(node_range) cmd = [ - f"--outdir=m5out_node_O3x_{node}", - "../../disaggregated_memory_setup/numa_config_sst_nodes.py", + f"--outdir=m5out_arm_node_{node}", + "../../disaggregated_memory/configs/arm-sst-numa-nodes.py", f"--cpu-clock-rate {cpu_clock_rate}", - f"--instance {node}" - - # "--outdir=m5out_{}".format(node), - # "../../configs/example/sst/riscv_fs_node.py", - # "--cpu-clock-rate {}".format(cpu_clock_rate), - # "--memory-size {}".format(node_memory_slice), - # # "--local-memory-size {}".format(node_memory_slice), - # # "--remote-memory-size {}".format(remote_memory_slice), - # "--instance {}".format(node) + "--cpu-type o3", + f"--local-memory-size {node_memory_slice}", + f"--remote-memory-addr-range {node_range[0]},{node_range[1]}", + f"--remote-memory-latency \ + {int(float(cpu_clock_rate[0:cpu_clock_rate.find('G')]) * 250)}" ] ports = { - "remote_memory_port" : "system.remote_memory" + "remote_memory_port" : "board.remote_memory" } port_list = [] for port in ports: @@ -180,14 +146,6 @@ def connect_components(link_name: str, mem_bus, node, port = True) - # directory_caches.append(create_cache("dir_cache_{}".format(node))) - # directory_caches[node].addParams({"network_address" : "2" }) - # Connect the basic components. - # connect_components("node_{}_mem_port_2_bus".format(node), - # memory_ports[node], 0, - # cache_buses[node], node, - # port = True) - # All system nodes are setup. Now create a SST memory. Keep it simplemem for # avoiding extra simulation time. There is only one memory node in SST's side. # This will be updated in the future to use number of sst_memory_nodes @@ -200,5 +158,5 @@ def connect_components(link_name: str, # enable Statistics stat_params = { "rate" : "0ns" } sst.setStatisticLoadLevel(10) -sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-example.txt"}) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./arm-board.txt"}) sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_board.py b/ext/sst/sst/example_board.py deleted file mode 100644 index 8439f39ae2..0000000000 --- a/ext/sst/sst/example_board.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sst -import sys -import os - -from sst import UnitAlgebra - -cache_link_latency = "1ps" - -bbl = "riscv-boot-exit-nodisk" -cpu_clock_rate = "3GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to -# memory currently, we do not subtract 0x80000000 from the request's address to -# get the "real" address so, the mem_size would always be 2GiB larger than the -# desired memory size - -# memory_size_gem5 = "2GiB" -memory_size_sst = "6GiB" -addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() - -l1_params = { - "access_latency_cycles" : "1", - "cache_frequency" : cpu_clock_rate, - "replacement_policy" : "lru", - "coherence_protocol" : "MESI", - "associativity" : "4", - "cache_line_size" : "64", - "cache_size" : "32MiB", - "L1" : "1", -} - -# We keep a track of all the memory ports that we have. -sst_ports = { - # "system_port" : "system.system_outgoing_bridge", - # "cache_port" : "system.memory_outgoing_bridge" - "remote_memory_port" : "system.remote_memory" -} - -# We need a list of ports. -port_list = [] -for port in sst_ports: - port_list.append(port) - -cpu_params = { - "frequency": cpu_clock_rate, - "cmd": " ../../disaggregated_memory_setup/numa_config_sst.py", - "debug_flags": "VIO", - "ports" : " ".join(port_list) -} - -gem5_node = sst.Component("gem5_node", "gem5.gem5Component") -gem5_node.addParams(cpu_params) - -# cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") -# cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) - -# for initialization -# system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) -# tell the SubComponent the name of the corresponding SimObject -# system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) - -# SST -> gem5 -# cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0) -# cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) - -# L1 cache -# l1_cache = sst.Component("l1_cache", "memHierarchy.Cache") -# l1_cache.addParams(l1_params) - -remote_memory_port = gem5_node.setSubComponent("remote_memory_port", - "gem5.gem5Bridge", 0) -remote_memory_port.addParams({ - "response_receiver_name" : sst_ports["remote_memory_port"] -}) - -# Memory -memctrl = sst.Component("memory", "memHierarchy.MemController") -# `addr_range_end` should be changed accordingly to memory_size_sst -memctrl.addParams({ - "debug" : "0", - "clock" : "2.4GHz", - "request_width" : "64", - "addr_range_end" : addr_range_end, -}) -memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") -memory.addParams({ - "access_time" : "30ns", - "mem_size" : memory_size_sst -}) - -# Connections -# cpu <-> L1 -# cpu_cache_link = sst.Link("cpu_l1_cache_link") -# cpu_cache_link.connect( -# (cache_port, "port", cache_link_latency), -# (cache_bus, "high_network_0", cache_link_latency) -# ) -# system_cache_link = sst.Link("system_cache_link") -# system_cache_link.connect( -# (system_port, "port", cache_link_latency), -# (cache_bus, "high_network_1", cache_link_latency) -# ) -# cache_bus_cache_link = sst.Link("cache_bus_cache_link") -# cache_bus_cache_link.connect( -# (cache_bus, "low_network_0", cache_link_latency), -# (l1_cache, "high_network_0", cache_link_latency) -# ) -# L1 <-> mem -cache_mem_link = sst.Link("l1_cache_mem_link") -cache_mem_link.connect( - (remote_memory_port, "port", cache_link_latency), - # (l1_cache, "low_network_0", cache_link_latency), - (memctrl, "direct_link", cache_link_latency) -) - -# enable Statistics -stat_params = { "rate" : "0ns" } -sst.setStatisticLoadLevel(5) -sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-stats.txt"}) diff --git a/ext/sst/sst/example_board_x86.py b/ext/sst/sst/example_board_x86.py deleted file mode 100644 index 5d7d3464a4..0000000000 --- a/ext/sst/sst/example_board_x86.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sst -import sys -import os - -from sst import UnitAlgebra - -cache_link_latency = "1ps" - -bbl = "riscv-boot-exit-nodisk" -cpu_clock_rate = "3GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to -# memory currently, we do not subtract 0x80000000 from the request's address to -# get the "real" address so, the mem_size would always be 2GiB larger than the -# desired memory size - -# memory_size_gem5 = "2GiB" -memory_size_sst = "6GiB" -addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() - -l1_params = { - "access_latency_cycles" : "1", - "cache_frequency" : cpu_clock_rate, - "replacement_policy" : "lru", - "coherence_protocol" : "MESI", - "associativity" : "4", - "cache_line_size" : "64", - "cache_size" : "32MiB", - "L1" : "1", -} - -# We keep a track of all the memory ports that we have. -sst_ports = { - # "system_port" : "system.system_outgoing_bridge", - # "cache_port" : "system.memory_outgoing_bridge" - "remote_memory_port" : "system.remote_memory" -} - -# We need a list of ports. -port_list = [] -for port in sst_ports: - port_list.append(port) - -cpu_params = { - "frequency": cpu_clock_rate, - "cmd": " ../../disaggregated_memory_setup/numa_config_x86.py", - "debug_flags": "", - "ports" : " ".join(port_list) -} - -gem5_node = sst.Component("gem5_node", "gem5.gem5Component") -gem5_node.addParams(cpu_params) - -# cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") -# cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) - -# for initialization -# system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) -# tell the SubComponent the name of the corresponding SimObject -# system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) - -# SST -> gem5 -# cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0) -# cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) - -# L1 cache -# l1_cache = sst.Component("l1_cache", "memHierarchy.Cache") -# l1_cache.addParams(l1_params) - -remote_memory_port = gem5_node.setSubComponent("remote_memory_port", - "gem5.gem5Bridge", 0) -remote_memory_port.addParams({ - "response_receiver_name" : sst_ports["remote_memory_port"] -}) - -# Memory -memctrl = sst.Component("memory", "memHierarchy.MemController") -# `addr_range_end` should be changed accordingly to memory_size_sst -memctrl.addParams({ - "debug" : "0", - "clock" : "2.4GHz", - "request_width" : "64", - "addr_range_end" : addr_range_end, -}) -memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") -memory.addParams({ - "access_time" : "30ns", - "mem_size" : memory_size_sst -}) - -# Connections -# cpu <-> L1 -# cpu_cache_link = sst.Link("cpu_l1_cache_link") -# cpu_cache_link.connect( -# (cache_port, "port", cache_link_latency), -# (cache_bus, "high_network_0", cache_link_latency) -# ) -# system_cache_link = sst.Link("system_cache_link") -# system_cache_link.connect( -# (system_port, "port", cache_link_latency), -# (cache_bus, "high_network_1", cache_link_latency) -# ) -# cache_bus_cache_link = sst.Link("cache_bus_cache_link") -# cache_bus_cache_link.connect( -# (cache_bus, "low_network_0", cache_link_latency), -# (l1_cache, "high_network_0", cache_link_latency) -# ) -# L1 <-> mem -cache_mem_link = sst.Link("l1_cache_mem_link") -cache_mem_link.connect( - (remote_memory_port, "port", cache_link_latency), - # (l1_cache, "low_network_0", cache_link_latency), - (memctrl, "direct_link", cache_link_latency) -) - -# enable Statistics -stat_params = { "rate" : "0ns" } -sst.setStatisticLoadLevel(5) -sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-stats.txt"}) diff --git a/ext/sst/sst/example_freq.py b/ext/sst/sst/example_freq.py deleted file mode 100644 index 7886e196b4..0000000000 --- a/ext/sst/sst/example_freq.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sst -import sys -import os - -from sst import UnitAlgebra - -cache_link_latency = "0ps" - -bbl = "riscv-boot-exit-nodisk" -cpu_clock_rate = "1GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to -# memory currently, we do not subtract 0x80000000 from the request's address to -# get the "real" address so, the mem_size would always be 2GiB larger than the -# desired memory size -memory_size_gem5 = "2GiB" -memory_size_sst = "4GiB" -addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() - -l1_params = { - "access_latency_cycles" : "1", - "cache_frequency" : cpu_clock_rate, - "replacement_policy" : "lru", - "coherence_protocol" : "MESI", - "associativity" : "4", - "cache_line_size" : "64", - "cache_size" : "2 MiB", - "L1" : "1", -} - -# We keep a track of all the memory ports that we have. -sst_ports = { - # "system_port" : "system.system_outgoing_bridge", - "cache_port" : "system.memory_outgoing_bridge" -} - -# We need a list of ports. -port_list = [] -for port in sst_ports: - port_list.append(port) - -cpu_params = { - "frequency": cpu_clock_rate, - "cmd": " --outdir=10MHz ../../configs/example/sst/riscv_fs.py" - + f" --cpu-clock-rate {cpu_clock_rate}" - + f" --memory-size {memory_size_gem5}", - "debug_flags": "VIO", - "ports" : " ".join(port_list) -} - -gem5_node = sst.Component("gem5_node", "gem5.gem5Component") -gem5_node.addParams(cpu_params) - -cache_bus = sst.Component("cache_bus", "memHierarchy.Bus") -cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) - -# for initialization -# system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) -# tell the SubComponent the name of the corresponding SimObject -# system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) - -# SST -> gem5 -cache_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) -cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) - -# L1 cache -# l1_cache = sst.Component("l1_cache", "memHierarchy.Cache") -# l1_cache.addParams(l1_params) - -# Memory -memctrl = sst.Component("memory", "memHierarchy.MemController") -# `addr_range_end` should be changed accordingly to memory_size_sst -memctrl.addParams({ - "debug" : "0", - "clock" : "1GHz", - "request_width" : "64", - "addr_range_end" : addr_range_end, -}) -memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") -memory.addParams({ - "access_time" : "48.75ns", - "mem_size" : memory_size_sst -}) - -# Connections -# cpu <-> L1 -cpu_cache_link = sst.Link("cpu_l1_cache_link") -cpu_cache_link.connect( - (cache_port, "port", cache_link_latency), - (cache_bus, "high_network_0", cache_link_latency) -) -# system_cache_link = sst.Link("system_cache_link") -# system_cache_link.connect( -# (system_port, "port", cache_link_latency), -# (cache_bus, "high_network_1", cache_link_latency) -# ) -# cache_bus_cache_link = sst.Link("cache_bus_cache_link") -# cache_bus_cache_link.connect( -# (cache_bus, "low_network_0", cache_link_latency), -# (l1_cache, "high_network_0", cache_link_latency) -# ) -# L1 <-> mem -cache_mem_link = sst.Link("l1_cache_mem_link") -cache_mem_link.connect( - (cache_bus, "low_network_0", cache_link_latency), - (memctrl, "direct_link", cache_link_latency) -) - -# enable Statistics -stat_params = { "rate" : "0ns" } -sst.setStatisticLoadLevel(5) -sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-stats-10Mhz.txt"}) -# sst.enableAllStatisticsForComponentName("l1_cache", stat_params) -sst.enableAllStatisticsForComponentName("memory", stat_params) diff --git a/ext/sst/sst/example_nodes_w_board_arm.py b/ext/sst/sst/example_multiISA.py similarity index 73% rename from ext/sst/sst/example_nodes_w_board_arm.py rename to ext/sst/sst/example_multiISA.py index 0096a190d1..5cf5e5030f 100644 --- a/ext/sst/sst/example_nodes_w_board_arm.py +++ b/ext/sst/sst/example_multiISA.py @@ -31,40 +31,9 @@ from sst import UnitAlgebra -cache_link_latency = "1ns" +cache_link_latency = "1ps" -bbl = "riscv-boot-exit-nodisk" cpu_clock_rate = "3GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory -# currently, we do not subtract 0x80000000 from the request's address to get the "real" address -# so, the mem_size would always be 2GiB larger than the desired memory size -# memory_size_gem5 = "2GiB" -# memory_size_sst = "4GiB" -# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() - -l1_params = { - "access_latency_cycles" : "1", - "cache_frequency" : cpu_clock_rate, - "replacement_policy" : "lru", - "coherence_protocol" : "MESI", - "associativity" : "4", - "cache_line_size" : "64", - "cache_size" : "32 MiB", - "L1" : "1", -} - -dirNicParams = { - "network_bw" : "25GB/s", - "group" : 1, -} - -def create_cache(name, params = None): - cache = sst.Component(name, "memHierarchy.Cache") - if params is None: - cache.addParams(l1_params) - else: - cache.addParams(params) - return cache def connect_components(link_name: str, low_port_name: str, low_port_idx: int, @@ -97,9 +66,10 @@ def connect_components(link_name: str, remote_memory_slice = "2GiB" # SST memory node size. Each system gets a 2 GiB slice of fixed memory. -sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str( + (memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) +"GiB" addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() -print(sst_memory_size) # There is one cache bus connecting all gem5 ports to the remote memory. mem_bus = sst.Component("membus", "memHierarchy.Bus") @@ -116,7 +86,7 @@ def connect_components(link_name: str, }) memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") memory.addParams({ - "access_time" : "50ns", + "access_time" : "30ns", "mem_size" : sst_memory_size }) @@ -129,19 +99,36 @@ def connect_components(link_name: str, # Each of the nodes needs to have the initial parameters. We might need to # to supply the instance count to the Gem5 side. This will enable range # adjustments to be made to the DTB File. - node_range = [0x80000000 + (node + 1) * 0x80000000, 0x80000000 + (node + 2) * 0x80000000] - print(node_range) - cmd = [ - f"--outdir=m5out_arm_node_{node}", - "../../disaggregated_memory_setup/numa_config_sst_nodes_arm.py", + cmd = [] + ports = {} + script = [] + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + node_range = [0x80000000 + (node + 1) * 0x80000000, + 0x80000000 + (node + 2) * 0x80000000] + if node % 2 == 0: + # This is a RISCV node. We need to call the RISCV script. + script = [ + f"--outdir=m5out_riscv_node_{node}", + "../../disaggregated_memory/configs/riscv-sst-numa-nodes.py", + ] + else: + script = [ + f"--outdir=m5out_arm_node_{node}", + "../../disaggregated_memory/configs/arm-sst-numa-nodes.py", + ] + + cmd = script + [ f"--cpu-clock-rate {cpu_clock_rate}", - f"--cpu-type o3", - f"--local-memory-range 2GiB", - f"--remote-memory-range {node_range[0]},{node_range[1]}", - # f"--instance {node}" + "--cpu-type timing", + f"--local-memory-size {node_memory_slice}", + f"--remote-memory-addr-range {node_range[0]},{node_range[1]}", + f"--remote-memory-latency \ + {int(float(cpu_clock_rate[0:cpu_clock_rate.find('G')]) * 250)}" ] ports = { - "remote_memory_port" : "system.remote_memory" + "remote_memory_port" : "board.remote_memory" } port_list = [] for port in ports: @@ -157,14 +144,15 @@ def connect_components(link_name: str, gem5_nodes.append( sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") ) + gem5_nodes[node].addParams(cpu_params) gem5_nodes[node].setRank(node + 1, 0) - memory_ports.append( gem5_nodes[node].setSubComponent( "remote_memory_port", "gem5.gem5Bridge", 0 ) ) + memory_ports[node].addParams({ "response_receiver_name" : ports["remote_memory_port"] }) @@ -177,14 +165,6 @@ def connect_components(link_name: str, mem_bus, node, port = True) - # directory_caches.append(create_cache("dir_cache_{}".format(node))) - # directory_caches[node].addParams({"network_address" : "2" }) - # Connect the basic components. - # connect_components("node_{}_mem_port_2_bus".format(node), - # memory_ports[node], 0, - # cache_buses[node], node, - # port = True) - # All system nodes are setup. Now create a SST memory. Keep it simplemem for # avoiding extra simulation time. There is only one memory node in SST's side. # This will be updated in the future to use number of sst_memory_nodes @@ -197,5 +177,6 @@ def connect_components(link_name: str, # enable Statistics stat_params = { "rate" : "0ns" } sst.setStatisticLoadLevel(10) -sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-arm-example-O3.txt"}) +sst.setStatisticOutput("sst.statOutputTXT", + {"filepath" : "./multiISA-board.txt"}) sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes.py b/ext/sst/sst/example_nodes.py deleted file mode 100644 index 2f14c4db93..0000000000 --- a/ext/sst/sst/example_nodes.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# This SST configuration file tests a merlin router. -import sst -import sys -import os - -from sst import UnitAlgebra - -cache_link_latency = "1ns" - -bbl = "riscv-boot-exit-nodisk" -cpu_clock_rate = "3GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory -# currently, we do not subtract 0x80000000 from the request's address to get the "real" address -# so, the mem_size would always be 2GiB larger than the desired memory size -# memory_size_gem5 = "2GiB" -# memory_size_sst = "4GiB" -# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() - -l1_params = { - "access_latency_cycles" : "1", - "cache_frequency" : cpu_clock_rate, - "replacement_policy" : "lru", - "coherence_protocol" : "MESI", - "associativity" : "4", - "cache_line_size" : "64", - "cache_size" : "32 MiB", - "L1" : "1", -} - -dirNicParams = { - "network_bw" : "25GB/s", - "group" : 1, -} - -def create_cache(name, params = None): - cache = sst.Component(name, "memHierarchy.Cache") - if params is None: - cache.addParams(l1_params) - else: - cache.addParams(params) - return cache - -def connect_components(link_name: str, - low_port_name: str, low_port_idx: int, - high_port_name: str, high_port_idx: int, - port = False, direct_link = False): - link = sst.Link(link_name) - low_port = "low_network_" + str(low_port_idx) - if port == True: - low_port = "port" - high_port = "high_network_" + str(high_port_idx) - if direct_link == True: - high_port = "direct_link" - link.connect( - (low_port_name, low_port, cache_link_latency), - (high_port_name, high_port, cache_link_latency) - ) - -# DEfine the number of gem5 nodes in the system. -system_nodes = 2 - -# Define the total number of SST Memory nodes -memory_nodes = 1 - -# This example uses fixed number of node size -> 2 GiB -# TODO: Fix this in the later version of the script. -# The directory controller decides where the addresses are mapped to. -node_memory_size = "2GiB" -remote_memory_slice = "2GiB" - -# SST memory node size. Each system gets a 2 GiB slice of fixed memory. -sst_memory_size = str((memory_nodes * int(node_memory_size[0])) + 2) + "GiB" -print(sst_memory_size) - -# Add all the Gem5 nodes to this list. -gem5_nodes = [] -cache_buses = [] -directory_caches = [] -comp_dirctrls = [] -memory_ports = [] - -# Create each of these nodes and conect it to a SST memory cache -for node in range(system_nodes): - # Each of the nodes needs to have the initial parameters. We might need to - # to supply the instance count to the Gem5 side. This will enable range - # adjustments to be made to the DTB File. - cmd = [ - "--outdir=m5out_{}".format(node), - "../../configs/example/sst/riscv_fs_node.py", - "--cpu-clock-rate {}".format(cpu_clock_rate), - "--memory-size {}".format(node_memory_size), - # "--local-memory-size {}".format(node_memory_size), - # "--remote-memory-size {}".format(remote_memory_slice), - "--instance {}".format(node) - ] - ports = { - "remote_memory_port" : "system.memory_outgoing_bridge" - } - port_list = [] - for port in ports: - port_list.append(port) - cpu_params = { - "frequency" : cpu_clock_rate, - "cmd" : " ".join(cmd), - "debug_flags" : "Plic,Clint", - "ports" : " ".join(port_list) - } - # Each of the Gem5 node has to be separately simulated. TODO: Figure out - # this part on the mpirun side. - gem5_nodes.append( - sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") - ) - gem5_nodes[node].addParams(cpu_params) - # We need a separate cache bus for each of the nodes - cache_buses.append( - sst.Component("cache_bus_for_node_{}".format(node), "memHierarchy.Bus") - ) - cache_buses[node].addParams({"bus_frequency" : cpu_clock_rate}) - # TODO: This needs to be updated - memory_ports.append( - gem5_nodes[node].setSubComponent( - "remote_memory_port", "gem5.gem5Bridge", 0 - ) - ) - memory_ports[node].addParams({ - "response_receiver_name" : "system.memory_outgoing_bridge" - }) - directory_caches.append(create_cache("dir_cache_{}".format(node))) - directory_caches[node].addParams({"network_address" : "2" }) - # Connect the basic components. - connect_components("node_{}_mem_port_2_bus".format(node), - memory_ports[node], 0, - cache_buses[node], 0, - port = True) - connect_components("node_{}_bus_2_dir_cache".format(node), - cache_buses[node], 0, - directory_caches[node], 0) - # Create directory controllers that dictates the memory ranges for each of - # the remote meory nodes. - comp_dirctrls.append(sst.Component( - "dirctrl_for_node_{}".format(node), - "memHierarchy.DirectoryController") - ) - addr_range_start = 0x80000000 + node * 0x80000000 - addr_range_end = 0x80000000 + (node + 1) * 0x80000000 - comp_dirctrls[node].addParams({ - "coherence_protocol" : "MESI", - "network_address" : "1", - "entry_cache_size" : "16384", - "network_bw" : "25GB/s", - "addr_range_start" : addr_range_start, # 2 * (1024 ** 3), # starts at 0x80000000 - "addr_range_end" : addr_range_end # 2 * (1024 ** 3) + 2048 * (1024 ** 2) # ends at 0x100000000 (4GiB) - }) -# All system nodes are setup. Now create a SST memory. Keep it simplemem for -# avoiding extra simulation time. There is only one memory node in SST's side. -# This will be updated in the future to use number of sst_memory_nodes -memory = sst.Component("memory", "memHierarchy.MemController") -memory.addParams({ - "request_width" : 64, - "coherence_protocol" : "MESI", - "access_time" : "33 ns", - "backend.mem_size" : sst_memory_size, - "clock" : "2.4GHz", - "debug" : "0", - "range_start" : 2 * (1024 ** 3), # it's behind a directory controller and it starts at 0x80000000 - }) -comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") -comp_chiprtr.setSubComponent("topology","merlin.singlerouter") -comp_chiprtr.addParams({ - "xbar_bw" : "128GB/s", - "link_bw" : "128GB/s", - "input_buf_size" : "1KB", - "num_ports" : str(system_nodes * 2), - "flit_size" : "72B", - "output_buf_size" : "1KB", - "id" : "0", - "topology" : "merlin.singlerouter" -}) -mem_bus = sst.Component("membus", "memHierarchy.Bus") -# Finally connect all the nodes together in the net -for node in range(system_nodes): - sst.Link("link_cache_net_node_{}".format(node)).connect( - (directory_caches[node], "directory", "10ns"), - (comp_chiprtr, "port" + str(node * 2 + 1), "2ns")) - sst.Link("link_dir_net_nodes_{}".format(node)).connect( - (comp_chiprtr, "port" + str(node * 2), "2ns"), - (comp_dirctrls[node], "network", "2ns")) - sst.Link("link_dir_mem_link_node_{}".format(node)).connect( - (comp_dirctrls[node], "memory", "10ns"), - (memory, "direct_link", "10ns")) -# enable Statistics -stat_params = { "rate" : "0ns" } -sst.setStatisticLoadLevel(10) -sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-router-example.txt"}) -sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes_multi_ISA.py b/ext/sst/sst/example_nodes_multi_ISA.py deleted file mode 100644 index 46e04a9751..0000000000 --- a/ext/sst/sst/example_nodes_multi_ISA.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# This SST configuration file tests a merlin router. -import sst -import sys -import os - -from sst import UnitAlgebra - -cache_link_latency = "1ns" - -bbl = "riscv-boot-exit-nodisk" -cpu_clock_rate = "3GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory -# currently, we do not subtract 0x80000000 from the request's address to get the "real" address -# so, the mem_size would always be 2GiB larger than the desired memory size -# memory_size_gem5 = "2GiB" -# memory_size_sst = "4GiB" -# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() - -l1_params = { - "access_latency_cycles" : "1", - "cache_frequency" : cpu_clock_rate, - "replacement_policy" : "lru", - "coherence_protocol" : "MESI", - "associativity" : "4", - "cache_line_size" : "64", - "cache_size" : "32 MiB", - "L1" : "1", -} - -dirNicParams = { - "network_bw" : "25GB/s", - "group" : 1, -} - -def create_cache(name, params = None): - cache = sst.Component(name, "memHierarchy.Cache") - if params is None: - cache.addParams(l1_params) - else: - cache.addParams(params) - return cache - -def connect_components(link_name: str, - low_port_name: str, low_port_idx: int, - high_port_name: str, high_port_idx: int, - port = False, direct_link = False): - link = sst.Link(link_name) - low_port = "low_network_" + str(low_port_idx) - if port == True: - low_port = "port" - high_port = "high_network_" + str(high_port_idx) - if direct_link == True: - high_port = "direct_link" - link.connect( - (low_port_name, low_port, cache_link_latency), - (high_port_name, high_port, cache_link_latency) - ) - -# =========================================================================== # - -# Define the number of gem5 nodes in the system. -system_nodes = 2 - -# Define the total number of SST Memory nodes -memory_nodes = 1 - -# This example uses fixed number of node size -> 2 GiB -# TODO: Fix this in the later version of the script. -# The directory controller decides where the addresses are mapped to. -node_memory_slice = "2GiB" -remote_memory_slice = "2GiB" - -# SST memory node size. Each system gets a 2 GiB slice of fixed memory. -sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" -addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() -print(sst_memory_size) - -# There is one cache bus connecting all gem5 ports to the remote memory. -mem_bus = sst.Component("membus", "memHierarchy.Bus") -mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) - -memctrl = sst.Component("memory", "memHierarchy.MemController") -memctrl.setRank(0, 0) -# `addr_range_end` should be changed accordingly to memory_size_sst -memctrl.addParams({ - "debug" : "0", - "clock" : "2.4GHz", - "request_width" : "64", - "addr_range_end" : addr_range_end, -}) -memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") -memory.addParams({ - "access_time" : "30ns", - "mem_size" : sst_memory_size -}) - -# Add all the Gem5 nodes to this list. -gem5_nodes = [] -memory_ports = [] - -# Create each of these nodes and conect it to a SST memory cache -for node in range(system_nodes): - # Each of the nodes needs to have the initial parameters. We might need to - # to supply the instance count to the Gem5 side. This will enable range - # adjustments to be made to the DTB File. - cmd = [] - ports = {} - if node % 2 == 1: - # This is a RISCV node - cmd = [ - f"--outdir=m5out_riscv_node_{node}", - "../../disaggregated_memory_setup/numa_config_sst_nodes.py", - f"--cpu-clock-rate {cpu_clock_rate}", - f"--instance {node}" - - # "--outdir=m5out_{}".format(node), - # "../../configs/example/sst/riscv_fs_node.py", - # "--cpu-clock-rate {}".format(cpu_clock_rate), - # "--memory-size {}".format(node_memory_slice), - # # "--local-memory-size {}".format(node_memory_slice), - # # "--remote-memory-size {}".format(remote_memory_slice), - # "--instance {}".format(node) - ] - ports = { - "remote_memory_port" : "system.remote_memory" - } - # Each of the Gem5 node has to be separately simulated. TODO: Figure out - # this part on the mpirun side. - gem5_nodes.append( - sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") - ) - memory_ports.append( - gem5_nodes[node].setSubComponent( - "remote_memory_port", "gem5.gem5Bridge", 0 - ) - ) - else: - cmd = [ - f"--outdir=m5out_arm_node_{node}", - "../../disaggregated_memory_setup/numa_config_sst_nodes_arm.py", - f"--cpu-clock-rate {cpu_clock_rate}", - f"--cpu-type timing", - f"--local-memory-range 2GiB", - f"--remote-memory-range 4294967296,6442450944", - # f"--instance {node}" - ] - ports = { - "remote_memory_port" : "system.remote_memory_outgoing_bridge" - } - # Each of the Gem5 node has to be separately simulated. TODO: Figure out - # this part on the mpirun side. - gem5_nodes.append( - sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") - ) - memory_ports.append( - gem5_nodes[node].setSubComponent( - "remote_memory_port", "gem5.gem5Bridge", 0 - ) - ) - # This is an ARM core - - port_list = [] - for port in ports: - port_list.append(port) - cpu_params = { - "frequency" : cpu_clock_rate, - "cmd" : " ".join(cmd), - "debug_flags" : "Plic,Clint,VIO", - "ports" : " ".join(port_list) - } - - gem5_nodes[node].addParams(cpu_params) - gem5_nodes[node].setRank(node + 1, 0) - - memory_ports[node].addParams({ - "response_receiver_name" : ports["remote_memory_port"] - }) - - # we dont need directory controllers in this example case. The start and - # end ranges does not really matter as the OS is doing this management in - # in this case. - connect_components(f"node_{node}_mem_port_2_mem_bus", - memory_ports[node], 0, - mem_bus, node, - port = True) - - # directory_caches.append(create_cache("dir_cache_{}".format(node))) - # directory_caches[node].addParams({"network_address" : "2" }) - # Connect the basic components. - # connect_components("node_{}_mem_port_2_bus".format(node), - # memory_ports[node], 0, - # cache_buses[node], node, - # port = True) - -# All system nodes are setup. Now create a SST memory. Keep it simplemem for -# avoiding extra simulation time. There is only one memory node in SST's side. -# This will be updated in the future to use number of sst_memory_nodes - -connect_components("membus_2_memory", - mem_bus, 0, - memctrl, 0, - direct_link = True) - -# enable Statistics -stat_params = { "rate" : "0ns" } -sst.setStatisticLoadLevel(10) -sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-example.txt"}) -sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_nodes_w_board.py b/ext/sst/sst/example_riscv_dm_board.py similarity index 69% rename from ext/sst/sst/example_nodes_w_board.py rename to ext/sst/sst/example_riscv_dm_board.py index 40eaae9656..9a6c07b6e6 100644 --- a/ext/sst/sst/example_nodes_w_board.py +++ b/ext/sst/sst/example_riscv_dm_board.py @@ -31,41 +31,8 @@ from sst import UnitAlgebra -cache_link_latency = "1ns" - -bbl = "riscv-boot-exit-nodisk" -cpu_clock_rate = "3GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory -# currently, we do not subtract 0x80000000 from the request's address to get the "real" address -# so, the mem_size would always be 2GiB larger than the desired memory size -# memory_size_gem5 = "2GiB" -# memory_size_sst = "4GiB" -# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() - -l1_params = { - "access_latency_cycles" : "1", - "cache_frequency" : cpu_clock_rate, - "replacement_policy" : "lru", - "coherence_protocol" : "MESI", - "associativity" : "4", - "cache_line_size" : "64", - "cache_size" : "32 MiB", - "L1" : "1", -} - -dirNicParams = { - "network_bw" : "25GB/s", - "group" : 1, -} - -def create_cache(name, params = None): - cache = sst.Component(name, "memHierarchy.Cache") - if params is None: - cache.addParams(l1_params) - else: - cache.addParams(params) - return cache - +cache_link_latency = "1ps" +cpu_clock_rate = "4.2GHz" def connect_components(link_name: str, low_port_name: str, low_port_idx: int, high_port_name: str, high_port_idx: int, @@ -84,22 +51,22 @@ def connect_components(link_name: str, # =========================================================================== # -# Define the number of gem5 nodes in the system. -system_nodes = 4 +# Define the number of gem5 nodes in the system. anything more than 1 needs +# mpirun to run the sst binary. +system_nodes = 2 # Define the total number of SST Memory nodes memory_nodes = 1 # This example uses fixed number of node size -> 2 GiB -# TODO: Fix this in the later version of the script. # The directory controller decides where the addresses are mapped to. node_memory_slice = "2GiB" remote_memory_slice = "2GiB" # SST memory node size. Each system gets a 2 GiB slice of fixed memory. -sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" +sst_memory_size = str( + (memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) +"GiB" addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() -print(sst_memory_size) # There is one cache bus connecting all gem5 ports to the remote memory. mem_bus = sst.Component("membus", "memHierarchy.Bus") @@ -129,24 +96,20 @@ def connect_components(link_name: str, # Each of the nodes needs to have the initial parameters. We might need to # to supply the instance count to the Gem5 side. This will enable range # adjustments to be made to the DTB File. + node_range = [0x80000000 + (node + 1) * 0x80000000, + 0x80000000 + (node + 2) * 0x80000000] cmd = [ - f"--outdir=m5out_node_ARM_O3_{node}", - # f"--outdir=switch_{node}", - # "../../disaggregated_memory_setup/numa_config_sst_nodes.py", - "../../disaggregated_memory_setup/numa_config_sst_nodes_arm.py", + f"--outdir=m5out_riscv_node_{node}", + "../../disaggregated_memory/configs/riscv-sst-numa-nodes.py", f"--cpu-clock-rate {cpu_clock_rate}", - f"--instance {node}" - - # "--outdir=m5out_{}".format(node), - # "../../configs/example/sst/riscv_fs_node.py", - # "--cpu-clock-rate {}".format(cpu_clock_rate), - # "--memory-size {}".format(node_memory_slice), - # # "--local-memory-size {}".format(node_memory_slice), - # # "--remote-memory-size {}".format(remote_memory_slice), - # "--instance {}".format(node) + "--cpu-type o3", + f"--local-memory-size {node_memory_slice}", + f"--remote-memory-addr-range {node_range[0]},{node_range[1]}", + f"--remote-memory-latency \ + {int(float(cpu_clock_rate[0:cpu_clock_rate.find('G')]) * 250)}" ] ports = { - "remote_memory_port" : "system.remote_memory" + "remote_memory_port" : "board.remote_memory" } port_list = [] for port in ports: @@ -182,14 +145,6 @@ def connect_components(link_name: str, mem_bus, node, port = True) - # directory_caches.append(create_cache("dir_cache_{}".format(node))) - # directory_caches[node].addParams({"network_address" : "2" }) - # Connect the basic components. - # connect_components("node_{}_mem_port_2_bus".format(node), - # memory_ports[node], 0, - # cache_buses[node], node, - # port = True) - # All system nodes are setup. Now create a SST memory. Keep it simplemem for # avoiding extra simulation time. There is only one memory node in SST's side. # This will be updated in the future to use number of sst_memory_nodes @@ -202,5 +157,5 @@ def connect_components(link_name: str, # enable Statistics stat_params = { "rate" : "0ns" } sst.setStatisticLoadLevel(10) -sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-boards-example-O3.txt"}) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./riscv-board.txt"}) sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_traffic_gen.py b/ext/sst/sst/example_traffic_gen.py deleted file mode 100644 index 0ad3e10a3a..0000000000 --- a/ext/sst/sst/example_traffic_gen.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (c) 2023 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# This SST configuration file tests a merlin router. -import sst -import sys -import os - -from sst import UnitAlgebra - -cache_link_latency = "1ns" - -bbl = "riscv-boot-exit-nodisk" -cpu_clock_rate = "1GHz" -# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory -# currently, we do not subtract 0x80000000 from the request's address to get the "real" address -# so, the mem_size would always be 2GiB larger than the desired memory size -# memory_size_gem5 = "2GiB" -# memory_size_sst = "4GiB" -# addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() - -l1_params = { - "access_latency_cycles" : "1", - "cache_frequency" : cpu_clock_rate, - "replacement_policy" : "lru", - "coherence_protocol" : "MESI", - "associativity" : "4", - "cache_line_size" : "64", - "cache_size" : "32 MiB", - "L1" : "1", -} - -dirNicParams = { - "network_bw" : "25GB/s", - "group" : 1, -} - -def create_cache(name, params = None): - cache = sst.Component(name, "memHierarchy.Cache") - if params is None: - cache.addParams(l1_params) - else: - cache.addParams(params) - return cache - -def connect_components(link_name: str, - low_port_name: str, low_port_idx: int, - high_port_name: str, high_port_idx: int, - port = False, direct_link = False): - link = sst.Link(link_name) - low_port = "low_network_" + str(low_port_idx) - if port == True: - low_port = "port" - high_port = "high_network_" + str(high_port_idx) - if direct_link == True: - high_port = "direct_link" - link.connect( - (low_port_name, low_port, cache_link_latency), - (high_port_name, high_port, cache_link_latency) - ) - -# =========================================================================== # - -# Define the number of gem5 nodes in the system. -system_nodes = 1 - -# Define the total number of SST Memory nodes -memory_nodes = 1 - -# This example uses fixed number of node size -> 2 GiB -# TODO: Fix this in the later version of the script. -# The directory controller decides where the addresses are mapped to. -node_memory_slice = "2GiB" -remote_memory_slice = "2GiB" - -# SST memory node size. Each system gets a 2 GiB slice of fixed memory. -sst_memory_size = str((memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) + "GiB" -addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() -print(sst_memory_size) - -# There is one cache bus connecting all gem5 ports to the remote memory. -mem_bus = sst.Component("membus", "memHierarchy.Bus") -mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) - -memctrl = sst.Component("memory", "memHierarchy.MemController") -memctrl.setRank(0, 0) -# `addr_range_end` should be changed accordingly to memory_size_sst -memctrl.addParams({ - "debug" : "1", - "clock" : "1GHz", - "request_width" : "64", - "verbose" : 2, - "debug_level" : 10, - "backing" : "none", - "addr_range_end" : addr_range_end, -}) -# memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") -# memory.addParams({ -# "access_time" : "50ns", -# "mem_size" : sst_memory_size -# }) - -memory = memctrl.setSubComponent( "backend", "memHierarchy.timingDRAM") -memory.addParams({ - "id" : 0, - "addrMapper" : "memHierarchy.sandyBridgeAddrMapper", - "addrMapper.interleave_size" : "64B", - "addrMapper.row_size" : "1KiB", - "clock" : "2.4GHz", - "mem_size" : sst_memory_size, - "channels" : 1, - "channel.numRanks" : 2, - "channel.rank.numBanks" : 16, - "channel.transaction_Q_size" : 64, - "channel.rank.bank.CL" : 14, - "channel.rank.bank.CL_WR" : 12, - "channel.rank.bank.RCD" : 14, - "channel.rank.bank.TRP" : 14, - "channel.rank.bank.dataCycles" : 2, - "channel.rank.bank.pagePolicy" : "memHierarchy.timeoutPagePolicy", - "channel.rank.bank.transactionQ" : "memHierarchy.reorderTransactionQ", - "channel.rank.bank.pagePolicy.timeoutCycles" : 50, - "printconfig" : 0, - "channel.printconfig" : 0, - "channel.rank.printconfig" : 0, - "channel.rank.bank.printconfig" : 0, -}) - - -# Add all the Gem5 nodes to this list. -gem5_nodes = [] -memory_ports = [] - -# Create each of these nodes and conect it to a SST memory cache -for node in range(system_nodes): - # Each of the nodes needs to have the initial parameters. We might need to - # to supply the instance count to the Gem5 side. This will enable range - # adjustments to be made to the DTB File. - cmd = [ - f"--outdir=traffic_gen_{node}", - "../../configs/example/sst/traffic_gen.py", - f"--cpu-clock-rate {cpu_clock_rate}", - "--memory-size 1GiB" - ] - ports = { - "remote_memory_port" : "system.memory_outgoing_bridge" - } - port_list = [] - for port in ports: - port_list.append(port) - cpu_params = { - "frequency" : cpu_clock_rate, - "cmd" : " ".join(cmd), - "debug_flags" : "", # TrafficGen", - "ports" : " ".join(port_list) - } - # Each of the Gem5 node has to be separately simulated. TODO: Figure out - # this part on the mpirun side. - gem5_nodes.append( - sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") - ) - gem5_nodes[node].addParams(cpu_params) - gem5_nodes[node].setRank(node + 1, 0) - - memory_ports.append( - gem5_nodes[node].setSubComponent( - "remote_memory_port", "gem5.gem5Bridge", 0 - ) - ) - memory_ports[node].addParams({ - "response_receiver_name" : ports["remote_memory_port"] - }) - - # we dont need directory controllers in this example case. The start and - # end ranges does not really matter as the OS is doing this management in - # in this case. - connect_components(f"node_{node}_mem_port_2_mem_bus", - memory_ports[node], 0, - mem_bus, node, - port = True) - -# All system nodes are setup. Now create a SST memory. Keep it simplemem for -# avoiding extra simulation time. There is only one memory node in SST's side. -# This will be updated in the future to use number of sst_memory_nodes - -connect_components("membus_2_memory", - mem_bus, 0, - memctrl, 0, - direct_link = True) - -# enable Statistics -stat_params = { "rate" : "0ns" } -sst.setStatisticLoadLevel(10) -sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./sst-traffic-example.txt"}) -sst.enableAllStatisticsForAllComponents() From 1afc0a8f755e984b7164731e72f91e47a05de596 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Sun, 26 Nov 2023 16:22:41 -0800 Subject: [PATCH 11/23] configs: updated numactl calls This change updates the numactl calls to correctly allocate memory in the local and the remote memory nodes. Signed-off-by: Kaustav Goswami --- disaggregated_memory/configs/arm-gem5-numa-nodes.py | 11 +++++++---- disaggregated_memory/configs/arm-sst-numa-nodes.py | 9 ++++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/disaggregated_memory/configs/arm-gem5-numa-nodes.py b/disaggregated_memory/configs/arm-gem5-numa-nodes.py index dd1d9a0b1d..4e1fa4beaf 100644 --- a/disaggregated_memory/configs/arm-gem5-numa-nodes.py +++ b/disaggregated_memory/configs/arm-gem5-numa-nodes.py @@ -71,7 +71,7 @@ def RemoteDualChannelDDR4_2400( """ return RemoteChanneledMemory( DDR4_2400_8x8, - 1, + 2, 64, size=size, remote_offset_latency=remote_offset_latency, @@ -108,18 +108,21 @@ def RemoteDualChannelDDR4_2400( "mount -t proc - /proc;", "numastat;", "m5 dumpresetstats 0 ;", - "numactl --cpubind=0 --membind=0 -- " + + "numactl --membind=0 -- " + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + "1000000;", "m5 dumpresetstats 0;", - "numactl --cpubind=0 --membind=0,1 -- " + + "numastat;", + "numactl --interleave=0,1 -- " + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + "1000000;", "m5 dumpresetstats 0;", - "numactl --cpubind=0 --membind=1 -- " + + "numastat;", + "numactl --membind=1 -- " + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + "1000000;", "m5 dumpresetstats 0;", + "numastat;", "m5 exit;", ] board.set_kernel_disk_workload( diff --git a/disaggregated_memory/configs/arm-sst-numa-nodes.py b/disaggregated_memory/configs/arm-sst-numa-nodes.py index 353a219c95..1c96ba4d70 100644 --- a/disaggregated_memory/configs/arm-sst-numa-nodes.py +++ b/disaggregated_memory/configs/arm-sst-numa-nodes.py @@ -136,18 +136,21 @@ "mount -t proc - /proc;", "numastat;", "m5 dumpresetstats 0 ;", - "numactl --cpubind=0 --membind=0 -- " + + "numactl --membind=0 -- " + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + "1000000;", "m5 dumpresetstats 0;", - "numactl --cpubind=0 --membind=0,1 -- " + + "numastat;", + "numactl --interleave=0,1 -- " + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + "1000000;", "m5 dumpresetstats 0;", - "numactl --cpubind=0 --membind=1 -- " + + "numastat;", + "numactl --membind=1 -- " + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + "1000000;", "m5 dumpresetstats 0;", + "numastat;", "m5 exit;", ] From a5051ac82c7bd85c4491f3f2998d7c6dd7e617c5 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Fri, 1 Dec 2023 21:54:48 +0000 Subject: [PATCH 12/23] ext,base-stats: add stats to the gem5 SST bridge This change adds stats counters to the gem5 SST external bridge to monitor the number and size of packets going out and coming into gem5. This numbers should also be reflected in SST's stats output file. Signed-off-by: Kaustav Goswami --- src/sst/outgoing_request_bridge.cc | 53 +++++++++++++++++++++++++----- src/sst/outgoing_request_bridge.hh | 29 ++++++++++++++-- 2 files changed, 71 insertions(+), 11 deletions(-) diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 7bfcd34e26..12333a7fa0 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -30,6 +30,7 @@ #include #include +#include "sim/stats.hh" #include "base/trace.hh" namespace gem5 @@ -38,6 +39,7 @@ namespace gem5 OutgoingRequestBridge::OutgoingRequestBridge( const OutgoingRequestBridgeParams ¶ms) : SimObject(params), + stats(this), outgoingPort(std::string(name()), this), sstResponder(nullptr), physicalAddressRanges(params.physical_address_ranges.begin(), @@ -63,6 +65,7 @@ OutgoingRequestPort::~OutgoingRequestPort() { } + void OutgoingRequestBridge::init() { @@ -97,7 +100,14 @@ OutgoingRequestBridge::setResponder(SSTResponderInterface* responder) bool OutgoingRequestBridge::sendTimingResp(gem5::PacketPtr pkt) { - return outgoingPort.sendTimingResp(pkt); + // see if the responder responded true or false. if it's true, then we + // increment the stats counters. + bool return_status = outgoingPort.sendTimingResp(pkt); + if (return_status == true) { + ++stats.numIncomingPackets; + stats.sizeIncomingPackets += pkt->getSize(); + } + return return_status; } void @@ -117,10 +127,16 @@ OutgoingRequestBridge::getInitPhaseStatus() { void OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) { + // This should not receive any functional accesses + // gem5::MemCmd::Command pktCmd = (gem5::MemCmd::Command)pkt->cmd.toInt(); + // std::cout << "Recv Functional : 0x" << std::hex << pkt->getAddr() << + // std::dec << " " << pktCmd << " " << gem5::MemCmd::WriteReq << " " << + // getInitPhaseStatus() << std::endl; // Check at which stage are we at. If we are at INIT phase, then queue all // these packets. if (!getInitPhaseStatus()) { + // sstResponder->recvAtomic(pkt); uint8_t* ptr = pkt->getPtr(); uint64_t size = pkt->getSize(); std::vector data(ptr, ptr+size); @@ -133,12 +149,9 @@ OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) // These packets have to translated at runtime. We convert these // packets to timing as its data has to be stored correctly in SST // memory. Otherwise reads from the SST memory will fail. To reproduce - // this error, do not handle any functional accesses and the kernel + // this error, don not handle any functional accesses and the kernel // boot will fail while reading the correct partition from the vda - // device. this is a hacky solution to solve functional accesses in the - // gem5 sst bridge. there are instances where the vda device will not - // work correctly. to reproduce errors, use 8 O3 CPUs accessing the - // same SST memory across 16 or 32 instances of gem5. + // device. sstResponder->handleRecvFunctional(pkt); } } @@ -147,6 +160,7 @@ Tick OutgoingRequestBridge:: OutgoingRequestPort::recvAtomic(PacketPtr pkt) { + // return 0; assert(false && "OutgoingRequestPort::recvAtomic not implemented"); return Tick(); } @@ -162,8 +176,19 @@ bool OutgoingRequestBridge:: OutgoingRequestPort::recvTimingReq(PacketPtr pkt) { - owner->sstResponder->handleRecvTimingReq(pkt); - return true; + return owner->handleTiming(pkt); +} + +bool OutgoingRequestBridge::handleTiming(PacketPtr pkt) +{ + // see if the responder responded true or false. if it's true, then we + // increment the stats counters. + bool return_status = sstResponder->handleRecvTimingReq(pkt); + if(ret == true) { + ++stats.numOutgoingPackets; + stats.sizeOutgoingPackets += pkt->getSize(); + } + return return_status; } void @@ -180,4 +205,16 @@ OutgoingRequestPort::getAddrRanges() const return owner->physicalAddressRanges; } +OutgoingRequestBridge::StatGroup::StatGroup(statistics::Group *parent) + : statistics::Group(parent), + ADD_STAT(numOutgoingPackets, statistics::units::Count::get(), + "Number of packets going out of the gem5 port"), + ADD_STAT(sizeOutgoingPackets, statistics::units::Byte::get(), + "Cumulative size of all the outgoing packets"), + ADD_STAT(numIncomingPackets, statistics::units::Count::get(), + "Number of packets coming into the gem5 port"), + ADD_STAT(sizeIncomingPackets, statistics::units::Byte::get(), + "Cumulative size of all the incoming packets") +{ +} }; // namespace gem5 diff --git a/src/sst/outgoing_request_bridge.hh b/src/sst/outgoing_request_bridge.hh index daef2cc96f..dfb2a60dbc 100644 --- a/src/sst/outgoing_request_bridge.hh +++ b/src/sst/outgoing_request_bridge.hh @@ -30,6 +30,7 @@ #include #include +#include "base/statistics.hh" #include "mem/port.hh" #include "params/OutgoingRequestBridge.hh" #include "sim/sim_object.hh" @@ -53,6 +54,7 @@ namespace gem5 class OutgoingRequestBridge: public SimObject { + public: class OutgoingRequestPort: public ResponsePort { @@ -81,6 +83,24 @@ class OutgoingRequestBridge: public SimObject // the memory. bool init_phase_bool; + public: + // we need a statistics counter for this simobject to find out how many + // requests were sent to or received from the outgoing port. + struct StatGroup : public statistics::Group + { + StatGroup(statistics::Group *parent); + /** Count the number of outgoing packets */ + statistics::Scalar numOutgoingPackets; + + + /** Cumulative size of the all outgoing packets */ + statistics::Scalar sizeOutgoingPackets; + + /** Count the number of incoming packets */ + statistics::Scalar numIncomingPackets; + /** Cumulative size of all the incoming packets */ + statistics::Scalar sizeIncomingPackets; + } stats; public: // a gem5 ResponsePort OutgoingRequestPort outgoingPort; @@ -97,7 +117,8 @@ class OutgoingRequestBridge: public SimObject // Required to let the OutgoingRequestPort to send range change request. void init(); - + + bool handleTiming(PacketPtr pkt); // Returns the range of addresses that the ports will handle. // Currently, it will return the range of [0x80000000, inf), which is // specific to RISCV (SiFive's HiFive boards). @@ -118,9 +139,9 @@ class OutgoingRequestBridge: public SimObject void initPhaseComplete(bool value); // We read the value of the init_phase_bool using `getInitPhaseStatus` - // method. This methids will be used later to swap memory ports. - bool getInitPhaseStatus(); + // method. + bool getInitPhaseStatus(); // gem5 Component (from SST) will call this function to let set the // bridge's corresponding SSTResponderSubComponent (which implemented // SSTResponderInterface). I.e., this will connect this bridge to the @@ -137,6 +158,8 @@ class OutgoingRequestBridge: public SimObject // to SST. Should only be called during the SST construction phase, i.e. // not at the simulation time. void handleRecvFunctional(PacketPtr pkt); + + }; }; // namespace gem5 From 57edb4b6521ba7a5428a4939618dac25567aab02 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Fri, 1 Dec 2023 14:11:30 -0800 Subject: [PATCH 13/23] configs,stdlib: added a private l1, l2 and shared l3 cache This change adds a private l1 private l2 and shared l3 cache hierarchy as a stdlib component. Signed-off-by: Kaustav Goswami --- .../cachehierarchies/dm_caches.py | 100 ++++++++++- .../cachehierarchies/dm_caches_sst.py | 96 ++++++++++- ...l1_private_l2_shared_l3_cache_hierarchy.py | 162 ++++++++++++++++++ 3 files changed, 356 insertions(+), 2 deletions(-) create mode 100644 disaggregated_memory/cachehierarchies/private_l1_private_l2_shared_l3_cache_hierarchy.py diff --git a/disaggregated_memory/cachehierarchies/dm_caches.py b/disaggregated_memory/cachehierarchies/dm_caches.py index c016d183a8..40fc96ca1d 100644 --- a/disaggregated_memory/cachehierarchies/dm_caches.py +++ b/disaggregated_memory/cachehierarchies/dm_caches.py @@ -33,10 +33,108 @@ from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache from gem5.components.boards.abstract_board import AbstractBoard from gem5.isas import ISA -from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port +from m5.objects import L2XBar from gem5.utils.override import overrides +from cachehierarchies.private_l1_private_l2_shared_l3_cache_hierarchy import ( + PrivateL1PrivateL2SharedL3CacheHierarchy) + +class ClassicPrivateL1PrivateL2SharedL3DMCache( + PrivateL1PrivateL2SharedL3CacheHierarchy): + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + l3_size: str, + l3_assoc: int = 16 + ): + super().__init__( + l1d_size=l1d_size, + l1i_size=l1i_size, + l2_size=l2_size, + l3_size=l3_size, + l3_assoc=l3_assoc + ) + + @overrides(PrivateL1PrivateL2SharedL3CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for cntr in board.get_local_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + for cntr in board.get_remote_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l3cache = L2Cache(size=self._l3_size, + assoc=self._l3_assoc, + tag_latency=self._l3_tag_latency, + data_latency=self._l3_data_latency, + response_latency=self._l3_response_latency, + mshrs=self._l3_mshrs, + tgts_per_mshr=self._l3_tgts_per_mshr) + # There is only one l3 bus, which connects l3 to the membus + self.l3bus = L2XBar() + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.l2caches[i].mem_side = self.l3bus.cpu_side_ports + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() + self.l3bus.mem_side_ports = self.l3cache.cpu_side + self.membus.cpu_side_ports = self.l3cache.mem_side + class ClassicPrivateL1PrivateL2DMCache(PrivateL1PrivateL2CacheHierarchy): def __init__( diff --git a/disaggregated_memory/cachehierarchies/dm_caches_sst.py b/disaggregated_memory/cachehierarchies/dm_caches_sst.py index 9c7a99a909..00edf5d69e 100644 --- a/disaggregated_memory/cachehierarchies/dm_caches_sst.py +++ b/disaggregated_memory/cachehierarchies/dm_caches_sst.py @@ -33,10 +33,104 @@ from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache from gem5.components.boards.abstract_board import AbstractBoard from gem5.isas import ISA -from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port +from m5.objects import L2XBar from gem5.utils.override import overrides +from cachehierarchies.private_l1_private_l2_shared_l3_cache_hierarchy import ( + PrivateL1PrivateL2SharedL3CacheHierarchy) + +class ClassicPrivateL1PrivateL2SharedL3SstDMCache( + PrivateL1PrivateL2SharedL3CacheHierarchy): + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + l3_size: str, + l3_assoc: int = 16 + ): + super().__init__( + l1d_size=l1d_size, + l1i_size=l1i_size, + l2_size=l2_size, + l3_size=l3_size, + l3_assoc=l3_assoc + ) + + @overrides(PrivateL1PrivateL2SharedL3CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for cntr in board.get_local_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l3cache = L2Cache(size=self._l3_size, + assoc=self._l3_assoc, + tag_latency=self._l3_tag_latency, + data_latency=self._l3_data_latency, + response_latency=self._l3_response_latency, + mshrs=self._l3_mshrs, + tgts_per_mshr=self._l3_tgts_per_mshr) + # There is only one l3 bus, which connects l3 to the membus + self.l3bus = L2XBar() + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.l2caches[i].mem_side = self.l3bus.cpu_side_ports + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() + self.l3bus.mem_side_ports = self.l3cache.cpu_side + self.membus.cpu_side_ports = self.l3cache.mem_side class ClassicPrivateL1PrivateL2SstDMCache(PrivateL1PrivateL2CacheHierarchy): def __init__( diff --git a/disaggregated_memory/cachehierarchies/private_l1_private_l2_shared_l3_cache_hierarchy.py b/disaggregated_memory/cachehierarchies/private_l1_private_l2_shared_l3_cache_hierarchy.py new file mode 100644 index 0000000000..db674460ba --- /dev/null +++ b/disaggregated_memory/cachehierarchies/private_l1_private_l2_shared_l3_cache_hierarchy.py @@ -0,0 +1,162 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import PrivateL1PrivateL2CacheHierarchy +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.isas import ISA + +from m5.objects import ( + Cache, + L2XBar, + BaseXBar, + SystemXBar, + BadAddr, + Port +) + +from gem5.utils.override import overrides + + +class PrivateL1PrivateL2SharedL3CacheHierarchy( + PrivateL1PrivateL2CacheHierarchy): + """ + A cache setup where each core has a private L1 Data and Instruction Cache, + and a private L2 cache. + """ + + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + l3_size: str, + l3_assoc: int = 16 + ) -> None: + """ + :param l1d_size: The size of the L1 Data Cache (e.g., "32kB"). + :type l1d_size: str + :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB"). + :type l1i_size: str + :param l2_size: The size of the L2 Cache (e.g., "256kB"). + :type l2_size: str + :param membus: The memory bus. This parameter is optional parameter and + will default to a 64 bit width SystemXBar is not specified. + + :type membus: BaseXBar + """ + super().__init__( + l1d_size=l1d_size, + l1i_size=l1i_size, + l2_size=l2_size + ) + + self._l3_size = l3_size + self._l3_assoc = l3_assoc + self._l3_tag_latency = 20 + self._l3_data_latency = 20 + self._l3_response_latency = 40 + self._l3_mshrs = 32 + self._l3_tgts_per_mshr = 12 + + + @overrides(PrivateL1PrivateL2CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for _, port in board.get_memory().get_mem_ports(): + self.membus.mem_side_ports = port + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l3cache = L2Cache(size=self._l3_size, + assoc=self._l3_assoc, + tag_latency=self._l3_tag_latency, + data_latency=self._l3_data_latency, + response_latency=self._l3_response_latency, + mshrs=self._l3_mshrs, + tgts_per_mshr=self._l3_tgts_per_mshr) + # There is only one l3 bus, which connects l3 to the membus + self.l3bus = L2XBar() + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.l2caches[i].mem_side = self.l3bus.cpu_side_ports + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() + self.l3bus.mem_side_ports = self.l3cache.cpu_side + self.membus.cpu_side_ports = self.l3cache.mem_side + From 18fe2ee8d8d9283bb8482224140ac2f84690b305 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Sat, 2 Dec 2023 01:53:45 +0000 Subject: [PATCH 14/23] configs: updated disaggregated memory scripts This change updates the disaggregated memory scripts to use a 3-level cache and dumps starts after each STREAM kernel. Signed-off-by: Kaustav Goswami --- .../configs/arm-gem5-numa-nodes.py | 24 +++++------ .../configs/arm-sst-numa-nodes.py | 42 ++++++++++--------- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/disaggregated_memory/configs/arm-gem5-numa-nodes.py b/disaggregated_memory/configs/arm-gem5-numa-nodes.py index 4e1fa4beaf..a36ce52f5d 100644 --- a/disaggregated_memory/configs/arm-gem5-numa-nodes.py +++ b/disaggregated_memory/configs/arm-gem5-numa-nodes.py @@ -43,7 +43,7 @@ from m5.objects import Root from boards.arm_gem5_board import ArmGem5DMBoard -from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2DMCache +from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2SharedL3DMCache from memories.remote_memory import RemoteChanneledMemory from gem5.utils.requires import requires from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 @@ -78,8 +78,8 @@ def RemoteDualChannelDDR4_2400( ) # Here we setup the parameters of the l1 and l2 caches. -cache_hierarchy = ClassicPrivateL1PrivateL2DMCache( - l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" +cache_hierarchy = ClassicPrivateL1PrivateL2SharedL3DMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="256KiB", l3_size="1MiB" ) # Memory: Dual Channel DDR4 2400 DRAM device. local_memory = DualChannelDDR4_2400(size="1GiB") @@ -103,28 +103,26 @@ def RemoteDualChannelDDR4_2400( remote_memory=remote_memory, cache_hierarchy=cache_hierarchy, ) + cmd = [ "mount -t sysfs - /sys;", "mount -t proc - /proc;", "numastat;", - "m5 dumpresetstats 0 ;", "numactl --membind=0 -- " + - "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + - "1000000;", - "m5 dumpresetstats 0;", + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", "numastat;", "numactl --interleave=0,1 -- " + - "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + - "1000000;", - "m5 dumpresetstats 0;", + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", "numastat;", "numactl --membind=1 -- " + - "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + - "1000000;", - "m5 dumpresetstats 0;", + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", "numastat;", "m5 exit;", ] + board.set_kernel_disk_workload( kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), bootloader=CustomResource( diff --git a/disaggregated_memory/configs/arm-sst-numa-nodes.py b/disaggregated_memory/configs/arm-sst-numa-nodes.py index 1c96ba4d70..8e2414f51f 100644 --- a/disaggregated_memory/configs/arm-sst-numa-nodes.py +++ b/disaggregated_memory/configs/arm-sst-numa-nodes.py @@ -46,10 +46,12 @@ from m5.objects import Root, AddrRange from boards.arm_sst_board import ArmSstDMBoard -from cachehierarchies.dm_caches_sst import ClassicPrivateL1PrivateL2SstDMCache +from cachehierarchies.dm_caches_sst import ( + ClassicPrivateL1PrivateL2SharedL3SstDMCache +) from memories.external_remote_memory import ExternalRemoteMemoryInterface from gem5.utils.requires import requires -from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory import SingleChannelDDR4_2400, DualChannelDDR4_2400 from gem5.components.processors.simple_processor import SimpleProcessor from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA @@ -98,34 +100,39 @@ "atomic": CPUTypes.ATOMIC, "timing": CPUTypes.TIMING}[args.cpu_type] -remote_memory_range = list(map(int, args.remote_memory_range.split(","))) +remote_memory_range = list(map(int, args.remote_memory_addr_range.split(","))) remote_memory_range = AddrRange(remote_memory_range[0], remote_memory_range[1]) # This runs a check to ensure the gem5 binary is compiled for RISCV. requires(isa_required=ISA.ARM) # Here we setup the parameters of the l1 and l2 caches. -cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache( - l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" +cache_hierarchy = ClassicPrivateL1PrivateL2SharedL3SstDMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="256KiB", l3_size="1MiB" ) # Memory: Dual Channel DDR4 2400 DRAM device. -local_memory = DualChannelDDR4_2400(size=args.local_memory_range) + +local_memory = SingleChannelDDR4_2400(size=args.local_memory_size) + # Either suppy the size of the remote memory or the address range of the # remote memory. Since this is inside the external memory, it does not matter # what type of memory is being simulated. This can either be initialized with # a size or a memory address range, which is mroe flexible. Adding remote # memory latency automatically adds a non-coherent crossbar to simulate latenyc + remote_memory = ExternalRemoteMemoryInterface( addr_range=remote_memory_range, remote_memory_latency=args.remote_memory_latency ) + # Here we setup the processor. We use a simple processor. processor = SimpleProcessor( - cpu_type=CPUTypes.TIMING, isa=ISA.ARM, num_cores=1 + cpu_type=CPUTypes.O3, isa=ISA.ARM, num_cores=4 ) + # Here we setup the board which allows us to do Full-System ARM simulations. board = ArmSstDMBoard( clk_freq=args.cpu_clock_rate, - processor=cpu_type, + processor=processor, local_memory=local_memory, remote_memory=remote_memory, cache_hierarchy=cache_hierarchy, @@ -135,21 +142,17 @@ "mount -t sysfs - /sys;", "mount -t proc - /proc;", "numastat;", - "m5 dumpresetstats 0 ;", "numactl --membind=0 -- " + - "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + - "1000000;", - "m5 dumpresetstats 0;", + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", "numastat;", "numactl --interleave=0,1 -- " + - "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + - "1000000;", - "m5 dumpresetstats 0;", + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", "numastat;", "numactl --membind=1 -- " + - "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + - "1000000;", - "m5 dumpresetstats 0;", + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", "numastat;", "m5 exit;", ] @@ -160,7 +163,8 @@ "kernel" : CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), "bootloader" : CustomResource("/home/kaustavg/.cache/gem5/arm64-bootloader-foundation"), "disk_image" : DiskImageResource( - local_path="/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img", + "/home/kaustavg/disk-images/arm/arm64sve-hpc-2204-20230526-numa.img", + # local_path="/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img", root_partition="1", ), "readfile_contents" : " ".join(cmd) From 8aabc15869db673f8d5314708c0645124bc11d60 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Sat, 2 Dec 2023 02:05:29 +0000 Subject: [PATCH 15/23] ext,base-stats: minor fix in the stat counter This commit fixes a compilation error on the gem5 SST bridge. Signed-off-by: Kaustav Goswami --- src/sst/outgoing_request_bridge.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 12333a7fa0..0685adad1d 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -184,7 +184,7 @@ bool OutgoingRequestBridge::handleTiming(PacketPtr pkt) // see if the responder responded true or false. if it's true, then we // increment the stats counters. bool return_status = sstResponder->handleRecvTimingReq(pkt); - if(ret == true) { + if(return_status == true) { ++stats.numOutgoingPackets; stats.sizeOutgoingPackets += pkt->getSize(); } From 24dfc0af52cc4b8a641731669613632a26fcbcef Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Fri, 1 Dec 2023 18:28:18 -0800 Subject: [PATCH 16/23] ext: fixed repeated lines of code This patch fixes the previous git merge that duplicated a few lines in the ext library of sst. Signed-off-by: Kaustav Goswami --- ext/sst/gem5.hh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ext/sst/gem5.hh b/ext/sst/gem5.hh index 3e08686f5e..7f4ea8850f 100644 --- a/ext/sst/gem5.hh +++ b/ext/sst/gem5.hh @@ -117,12 +117,6 @@ class gem5Component: public SST::Component std::vector sstPortNames; int sstPortCount; - // We need a list of incoming port names so that we don't need to recompile - // everytime when we add a new OutgoingBridge from python. - std::vector sstPorts; - std::vector sstPortNames; - int sstPortCount; - void initPython(int argc, char **argv); void splitCommandArgs(std::string &cmd, std::vector &args); void splitPortNames(std::string port_names); From 167bf8db08163c9e7fc36e358c68aded10dfdfa7 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Sat, 2 Dec 2023 02:05:29 +0000 Subject: [PATCH 17/23] ext,base-stats: minor fix in the stat counter This commit fixes a compilation error on the gem5 SST bridge. Signed-off-by: Kaustav Goswami --- src/sst/outgoing_request_bridge.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 12333a7fa0..0685adad1d 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -184,7 +184,7 @@ bool OutgoingRequestBridge::handleTiming(PacketPtr pkt) // see if the responder responded true or false. if it's true, then we // increment the stats counters. bool return_status = sstResponder->handleRecvTimingReq(pkt); - if(ret == true) { + if(return_status == true) { ++stats.numOutgoingPackets; stats.sizeOutgoingPackets += pkt->getSize(); } From e2610b390e9cabc1eded97cbfb721cf47dbd57b4 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Fri, 1 Dec 2023 18:40:39 -0800 Subject: [PATCH 18/23] ext: fixed ext: fixed repeated lines of code This commit further fixes repeted lines of code. Signed-off-by: Kaustav Goswami --- ext/sst/gem5.hh | 1 - 1 file changed, 1 deletion(-) diff --git a/ext/sst/gem5.hh b/ext/sst/gem5.hh index 7f4ea8850f..1941691edd 100644 --- a/ext/sst/gem5.hh +++ b/ext/sst/gem5.hh @@ -120,7 +120,6 @@ class gem5Component: public SST::Component void initPython(int argc, char **argv); void splitCommandArgs(std::string &cmd, std::vector &args); void splitPortNames(std::string port_names); - void splitPortNames(std::string port_names); bool threadInitialized; From 23e680d29fd2063405369e652c481d0de955cc15 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Tue, 12 Dec 2023 18:56:00 -0800 Subject: [PATCH 19/23] ext: updated the example arm dm script This change makes the remote memory a 4 channel DDR4-like memory installed on the remote node. Signed-off-by: Kaustav Goswami --- ext/sst/sst/example_arm_dm_board.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/ext/sst/sst/example_arm_dm_board.py b/ext/sst/sst/example_arm_dm_board.py index 0da906066c..bbbf2e5172 100644 --- a/ext/sst/sst/example_arm_dm_board.py +++ b/ext/sst/sst/example_arm_dm_board.py @@ -77,14 +77,23 @@ def connect_components(link_name: str, # `addr_range_end` should be changed accordingly to memory_size_sst memctrl.addParams({ "debug" : "0", - "clock" : "2.4GHz", + "clock" : "1.2GHz", "request_width" : "64", "addr_range_end" : addr_range_end, }) -memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory = memctrl.setSubComponent( "backend", "memHierarchy.timingDRAM") memory.addParams({ - "access_time" : "50ns", - "mem_size" : sst_memory_size + "id" : 0, + "addrMapper" : "memHierarchy.simpleAddrMapper", + "addrMapper.interleave_size" : "64B", + "addrMapper.row_size" : "1KiB", + "clock" : "1.2GHz", + "mem_size" : sst_memory_size, + "channels" : 4, + "channel.numRanks" : 2, + "channel.rank.numBanks" : 16, + "channel.rank.bank.TRP" : 14, + "printconfig" : 1, }) # Add all the Gem5 nodes to this list. From 8c1003935ba6981affae4495f67025883750c5e5 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Fri, 19 Jan 2024 13:07:07 -0800 Subject: [PATCH 20/23] ext: restrict any functional reads to go to sst This change restricts any functional reads at runtime to go to SST. Signed-off-by: Kaustav Goswami --- src/sst/outgoing_request_bridge.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 0685adad1d..edff92c923 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -152,6 +152,11 @@ OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) // this error, don not handle any functional accesses and the kernel // boot will fail while reading the correct partition from the vda // device. + + // we cannot allow any functional reads to go to SST + if (pkt->isRead()) { + assert(false && "Outgoing bridge cannot handle functional reads!"); + } sstResponder->handleRecvFunctional(pkt); } } From cabe028ae64b64bfb70d89d79cdde54e0d8bd20c Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Fri, 19 Jan 2024 13:43:33 -0800 Subject: [PATCH 21/23] configs: added a KVM capable ARM NUMA runscript This change adds a KVM capable runscript for testing the NUMA setup. Signed-off-by: Kaustav Goswami --- .../configs/arm-gem5-numa-nodes-w-kvm.py | 149 ++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py diff --git a/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py b/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py new file mode 100644 index 0000000000..08c1467845 --- /dev/null +++ b/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py @@ -0,0 +1,149 @@ +# Copyright (c) 2023-24 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system ARM Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `STREAM`. The simulation ends when the +startup is completed successfully. + +This config works if the host ARM machine supports KVM. The +PR https://github.com/gem5/gem5/pull/725 is needed to be present in the source. + +Limitations: +This only works with VExpress_GEM5_V1 and bootloader-v1 +""" + +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root + +from boards.arm_gem5_board import ArmGem5DMBoard +from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2SharedL3DMCache +from memories.remote_memory import RemoteChanneledMemory +from gem5.utils.requires import requires +from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for ARM. + +requires(isa_required=ISA.ARM) + +# defining a new type of memory with latency added. This memory interface can +# be used as a remote memory interface to simulate disaggregated memory. +def RemoteDualChannelDDR4_2400( + size: Optional[str] = None, remote_offset_latency=300 +) -> AbstractMemorySystem: + """ + A dual channel memory system using DDR4_2400_8x8 based DIMM + """ + return RemoteChanneledMemory( + DDR4_2400_8x8, + 2, + 64, + size=size, + remote_offset_latency=remote_offset_latency, + ) + +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2SharedL3DMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="256KiB", l3_size="1MiB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = DualChannelDDR4_2400(size="1GiB") +# The remote meomry can either be a simple Memory Interface, which is from a +# different memory arange or it can be a Remote Memory Range, which has an +# inherent delay while performing reads and writes into that memory. For simple +# memory, use any MemInterfaces available in gem5 standard library. For remtoe +# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this +# config script to extend any existing MemInterface class and add latency value +# to that memory. +remote_memory = RemoteDualChannelDDR4_2400( + size="1GB", remote_offset_latency=750 +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.ARM, num_cores=1) +# Here we setup the board which allows us to do Full-System ARM simulations. +board = ArmGem5DMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "numactl --membind=0 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "numactl --interleave=0,1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "numactl --membind=1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "m5 exit;", +] + +board.set_kernel_disk_workload( + # NUMA, SPARSEMEM and HOTPLUG enabled kernel. + kernel=CustomResource("/home/kaustavg/kernel/arm/linux-6.7/vmlinux"), + bootloader=CustomResource( + "/home/kaustavg/.cache/gem5/arm64-bootloader" + ), + disk_image=DiskImageResource( + "/home/kaustavg/disk-images/arm/arm64sve-hpc-2204-20230526-numa.img", + root_partition="1", + ), + readfile_contents=" ".join(cmd), +) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +simulator = Simulator(board=board) +simulator.run() From 53f8b470ff0147acaa062ac29cbf1e083da728a7 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Fri, 19 Jan 2024 13:44:57 -0800 Subject: [PATCH 22/23] arch-x86,configs: added a WIP x86 dm board This change adds a WIP working x86 board that allows memory offlining with compatible kernel. Signed-off-by: Kaustav Goswami --- disaggregated_memory/boards/x86_dm_board.py | 152 +++++------ disaggregated_memory/boards/x86_gem5_board.py | 199 +++++++++++++++ disaggregated_memory/boards/x86_sst_board.py | 235 ++++++++++++++++++ .../configs/x86-gem5-numa-nodes.py | 155 ++++++++++++ .../configs/x86-sst-numa-nodes.py | 129 ++++++++++ 5 files changed, 781 insertions(+), 89 deletions(-) create mode 100644 disaggregated_memory/boards/x86_gem5_board.py create mode 100644 disaggregated_memory/boards/x86_sst_board.py create mode 100644 disaggregated_memory/configs/x86-gem5-numa-nodes.py create mode 100644 disaggregated_memory/configs/x86-sst-numa-nodes.py diff --git a/disaggregated_memory/boards/x86_dm_board.py b/disaggregated_memory/boards/x86_dm_board.py index db73096ba9..945ca12391 100644 --- a/disaggregated_memory/boards/x86_dm_board.py +++ b/disaggregated_memory/boards/x86_dm_board.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 The Regents of the University of California +# Copyright (c) 2023-24 The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -70,8 +70,20 @@ from typing import List, Sequence, Tuple - -class X86DMBoard(X86Board): +class X86AbstractDMBoard(X86Board): + """ + A high-level X86 board that can zNUMA-capable systems with a remote + memories. This board is extended from the ArmBoard from Gem5 standard + library. This board assumes that you will be booting Linux. This board can + be used to do disaggregated ARM system research while accelerating the + simulation using kvm. + + The reason this board was created was to leverage the features X86 ISA has + over ARM and RISCV, e.g. memory hotplug and ACPI driver support in gem5. + + **Limitations** + * kvm is only supported in a gem5-only setup. + """ __metaclass__ = ABCMeta def __init__( @@ -79,25 +91,23 @@ def __init__( clk_freq: str, processor: AbstractProcessor, cache_hierarchy: AbstractCacheHierarchy, - memory: AbstractMemorySystem, - # remote_memory_str: str - # remote_memory: AbstractMemorySystem - remote_memory_size: str, + local_memory: AbstractMemorySystem, + remote_memory_addr_range: AddrRange, ) -> None: - self._localMemory = memory - self._remoteMemorySize = remote_memory_size - self._remoteMemory = OutgoingRequestBridge( - physical_address_ranges=AddrRange(0x40000000, 0x80000000) - ) - print(self._remoteMemory.physical_address_ranges[0]) + # The structure of this board is similar to the RISCV DM board. + self._localMemory = local_memory + # remote_memory can either be an interface or an external memory + # This abstract disaggregated memory does not know what this type of + # memory is. it only needs to know the address range for this memory. + # from this range, we'll figure out the size. + self._remoteMemoryAddrRange = remote_memory_addr_range super().__init__( clk_freq=clk_freq, processor=processor, cache_hierarchy=cache_hierarchy, - memory=memory, + memory=local_memory, ) - self.local_memory = memory - self.remote_memory = self._remoteMemory + self.local_memory = local_memory @overrides(X86Board) def get_memory(self) -> "AbstractMemory": @@ -135,17 +145,20 @@ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: @overrides(X86Board) def _setup_memory_ranges(self): - # Need to create 2 entries for the memory ranges - # local_memory = self.get_local_memory() - # remote_memory = self.get_local_memory() + # Need to create 2 entries for the memory ranges. + # make the local memory as 3 gb for now. + + self.get_local_memory().set_memory_range([AddrRange(start=0x0, size="1GiB")]) + self.get_remote_memory().set_memory_range([AddrRange(start=0x40000000, size="1GiB")]) + # remote_memory = self.get_remote_memory() # local_mem_size = local_memory.get_size() # remote_mem_size = remote_memory.get_size() - self._local_mem_ranges = [ - "2GiB" - # AddrRange(local_mem_size) - ] + # self._local_mem_ranges = [ + # "2GiB" + # # AddrRange(local_mem_size) + # ] # The remote memory starts where the local memory ends. Therefore it # has to be offset by the local memory's size. @@ -156,9 +169,9 @@ def _setup_memory_ranges(self): # Keep it under 2 GB for this case. Each slice of memory is 1 GB. self.mem_ranges = [ - self._local_mem_ranges[0], - # self._remote_mem_ranges[0], - AddrRange(0xC0000000, size=0x100000), # For I/0 + AddrRange(start=0x0, size="1GiB"), + AddrRange(start=0x40000000, size="1GiB"), + # AddrRange(0xC0000000, size=0x100000), # For I/0 ] @overrides(X86Board) @@ -167,12 +180,17 @@ def get_default_kernel_args(self) -> List[str]: "earlyprintk=ttyS0", "console=ttyS0", "lpj=7999923", - "root={root_value}", - "init=/bin/bash", + "root=/dev/sda1", + # "numa=fake=2", + # "movable_node", + # "kernelcore=1G", + # "mem=1G", + "memmap=1G@0x0" + # "init=/bin/bash", ] - # @overrides(X86Board) - def _setup_io_devicess(self): + @overrides(X86Board) + def _setup_io_devices(self): """Sets up the x86 IO devices. Note: This is mostly copy-paste from prior X86 FS setups. Some of it @@ -307,77 +325,33 @@ def assignISAInt(irq, apicPin): X86E820Entry(addr=0, size="639kB", range_type=1), X86E820Entry(addr=0x9FC00, size="385kB", range_type=2), # Mark the rest of physical memory as available - # the local address comes first. X86E820Entry( addr=0x100000, size=f"{self.mem_ranges[0].size() - 0x100000:d}B", range_type=1, - ), + ) + ] + # Reserve the last 16kB of the 32-bit address space for m5ops + entries.append( + X86E820Entry( + addr=0x40000000, + size="%dB" % (self.mem_ranges[0].size()), + range_type=5, + ) + ) + entries.append(X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2)) # X86E820Entry( # addr=0x100000000, # size=f"{self.mem_ranges[1].size()}B", # range_type=1, # ), - ] # print("____", self.mem_ranges[0].size() + 0x100000) # Reserve the last 16kB of the 32-bit address space for m5ops - entries.append( - X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2) - ) + # entries.append( + # X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2) + # ) print(entries) + print() self.workload.e820_table.entries = entries - - @overrides(AbstractBoard) - def _connect_things(self) -> None: - """Connects all the components to the board. - - The order of this board is always: - - 1. Connect the memory. - 2. Connect the cache hierarchy. - 3. Connect the processor. - - Developers may build upon this assumption when creating components. - - Notes - ----- - - * The processor is incorporated after the cache hierarchy due to a bug - noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this - bug is fixed, this ordering must be maintained. - * Once this function is called `_connect_things_called` *must* be set - to `True`. - """ - - if self._connect_things_called: - raise Exception( - "The `_connect_things` function has already been called." - ) - - # Incorporate the memory into the motherboard. - self.get_local_memory().incorporate_memory(self) - print("_", self.get_local_memory().mem_ctrl) - self.get_remote_memory().port = ( - self.get_cache_hierarchy().membus.mem_side_ports - ) - # self.get_remote_memory().incorporate_memory(self) - - # Incorporate the cache hierarchy for the motherboard. - if self.get_cache_hierarchy(): - self.get_cache_hierarchy().incorporate_cache(self) - - # Incorporate the processor into the motherboard. - self.get_processor().incorporate_processor(self) - - self._connect_things_called = True - - @overrides(AbstractBoard) - def _post_instantiate(self): - """Called to set up anything needed after m5.instantiate""" - self.get_processor()._post_instantiate() - if self.get_cache_hierarchy(): - self.get_cache_hierarchy()._post_instantiate() - self.get_local_memory()._post_instantiate() - # self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory/boards/x86_gem5_board.py b/disaggregated_memory/boards/x86_gem5_board.py new file mode 100644 index 0000000000..c9fa77ea80 --- /dev/null +++ b/disaggregated_memory/boards/x86_gem5_board.py @@ -0,0 +1,199 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + Port, + Pc, + AddrRange, + X86FsLinux, + Addr, + X86SMBiosBiosInformation, + X86IntelMPProcessor, + X86IntelMPIOAPIC, + X86IntelMPBus, + X86IntelMPBusHierarchy, + X86IntelMPIOIntAssignment, + X86E820Entry, + Bridge, + IOXBar, + IdeDisk, + CowDiskImage, + RawDiskImage, + BaseXBar, + Port, + OutgoingRequestBridge, +) + +import os +import m5 + +from abc import ABCMeta + +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + +from memories.remote_memory import RemoteChanneledMemory +from boards.x86_dm_board import X86AbstractDMBoard + +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +class X86Gem5DMBoard(X86AbstractDMBoard): + + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_addr_range: AddrRange = None + ) -> None: + self._localMemory = local_memory + self._remoteMemory = remote_memory + # If the remote_memory_addr_range is not provided, we'll assume that + # it starts at 0x80000000 + local_memory_size and ends at it's own size + if remote_memory_addr_range is None: + remote_memory_addr_range = AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=remote_memory_addr_range, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + self.remote_memory = remote_memory + + @overrides(X86AbstractDMBoard) + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + @overrides(X86AbstractDMBoard) + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_remote_memory().get_mem_ports() + + # @overrides(X86AbstractDMBoard) + # def _set_remote_memory_ranges(self): + # self.get_remote_memory().set_memory_range( + # [self._remoteMemoryAddrRange] + # ) + + # @overrides(X86AbstractDMBoard) + # def get_default_kernel_args(self) -> List[str]: + + # # The default kernel string is taken from the devices.py file. + # return [ + # "console=ttyAMA0", + # "lpj=19988480", + # "norandmaps", + # "root={root_value}", + # "rw", + # "init=/root/gem5-init.sh", + # "kernelcore=2048M" + # ] + + + @overrides(X86AbstractDMBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + self.get_remote_memory().incorporate_memory(self) + + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + # need to connect the remote links to the board. + if self.get_cache_hierarchy().is_ruby(): + fatal( + "remote memory is only supported in classic caches at " + + "the moment!") + if isinstance(self.get_remote_memory(), RemoteChanneledMemory): + for ports in self.get_remote_memory().remote_links: + self.get_cache_hierarchy().membus.mem_side_ports = \ + ports.cpu_side_ports + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(X86AbstractDMBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + self.get_remote_memory()._post_instantiate() + + # print("____", self.remote_memory.mem_ctrl0.dram) diff --git a/disaggregated_memory/boards/x86_sst_board.py b/disaggregated_memory/boards/x86_sst_board.py new file mode 100644 index 0000000000..a32275ad24 --- /dev/null +++ b/disaggregated_memory/boards/x86_sst_board.py @@ -0,0 +1,235 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + Port, + Pc, + AddrRange, + X86FsLinux, + Addr, + X86SMBiosBiosInformation, + X86IntelMPProcessor, + X86IntelMPIOAPIC, + X86IntelMPBus, + X86IntelMPBusHierarchy, + X86IntelMPIOIntAssignment, + X86E820Entry, + Bridge, + IOXBar, + IdeDisk, + CowDiskImage, + RawDiskImage, + BaseXBar, + Port, + NoncoherentXBar, + OutgoingRequestBridge, +) + +import os +import m5 + +from abc import ABCMeta + +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + +from memories.remote_memory import RemoteChanneledMemory +from boards.x86_dm_board import X86AbstractDMBoard + +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +class X86SstDMBoard(X86AbstractDMBoard): + + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: "ExternalRemoteMemoryInterface", + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_addr_range: AddrRange = None + ) -> None: + self._localMemory = local_memory + # Since the remote memory is defined in SST's side, we only need the + # size of this memory while setting up stuff from Gem5's side. + self._remoteMemory = remote_memory + # The remote memory is either setup with a size or an address range. + # We need to determine if the address range is set. if not, then we + # need to find the starting and ending of the the external memory + # range. + if not self._remoteMemory.get_set_using_addr_ranges(): + # Address ranges were not set, but the system knows the size + # If the remote_memory_addr_range is not provided, we'll assume + # that it starts at 0x80000000 + local_memory_size and ends at it's + # own size + self._remoteMemory.remote_memory.physical_address_ranges = [ + AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + ] + # We need a size as a string to setup this memory. + self._remoteMemorySize = self._remoteMemory.get_size() + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=remote_memory_addr_range, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + self.remote_memory = self._remoteMemory.remote_memory + + @overrides(X86AbstractDMBoard) + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + @overrides(X86AbstractDMBoard) + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + + # @overrides(X86AbstractDMBoard) + # def _set_remote_memory_ranges(self): + # self.get_remote_memory().set_memory_range( + # [self._remoteMemoryAddrRange] + # ) + + # @overrides(X86AbstractDMBoard) + # def get_default_kernel_args(self) -> List[str]: + + # # The default kernel string is taken from the devices.py file. + # return [ + # "console=ttyAMA0", + # "lpj=19988480", + # "norandmaps", + # "root={root_value}", + # "rw", + # "init=/root/gem5-init.sh", + # "kernelcore=2048M" + # ] + + + @overrides(X86AbstractDMBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + # we need to find whether there is any external latency. if yes, then + # add xbar to add this latency. + + if self.get_remote_memory().is_xbar_required(): + self.remote_link = NoncoherentXBar( + frontend_latency=0, + forward_latency=0, + response_latency=self.get_remote_memory()._remote_memory_latency, + width=64, + ) + # connect the remote memory port to the remote link + self.get_remote_memory().remote_memory.port = ( + self.remote_link.mem_side_ports + ) + # The remote link is then connected to the membus + self.get_cache_hierarchy().membus.mem_side_ports = ( + self.remote_link.cpu_side_ports + ) + else: + # Connect the external memory directly to the motherboard. + self.get_remote_memory().remote_memory.port = ( + self.get_cache_hierarchy().membus.mem_side_ports + ) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + # need to connect the remote links to the board. + if self.get_cache_hierarchy().is_ruby(): + fatal( + "remote memory is only supported in classic caches at " + + "the moment!") + if isinstance(self.get_remote_memory(), RemoteChanneledMemory): + for ports in self.get_remote_memory().remote_links: + self.get_cache_hierarchy().membus.mem_side_ports = \ + ports.cpu_side_ports + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(X86AbstractDMBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() diff --git a/disaggregated_memory/configs/x86-gem5-numa-nodes.py b/disaggregated_memory/configs/x86-gem5-numa-nodes.py new file mode 100644 index 0000000000..2c03f13160 --- /dev/null +++ b/disaggregated_memory/configs/x86-gem5-numa-nodes.py @@ -0,0 +1,155 @@ +# Copyright (c) 2023-24 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system ARM Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `STREAM`. The simulation ends when the +startup is completed successfully. +""" + +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root + +from boards.x86_gem5_board import X86Gem5DMBoard +from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2DMCache, ClassicPrivateL1PrivateL2SharedL3DMCache +from memories.remote_memory import RemoteChanneledMemory +from gem5.utils.requires import requires +from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 +from gem5.components.memory import SingleChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for ARM. + +requires(isa_required=ISA.X86) + +# defining a new type of memory with latency added. This memory interface can +# be used as a remote memory interface to simulate disaggregated memory. +def RemoteDualChannelDDR4_2400( + size: Optional[str] = None, remote_offset_latency=300 +) -> AbstractMemorySystem: + """ + A dual channel memory system using DDR4_2400_8x8 based DIMM + """ + return RemoteChanneledMemory( + DDR4_2400_8x8, + 1, + 64, + size=size, + remote_offset_latency=remote_offset_latency, + ) + +# Here we setup the parameters of the l1 and l2 caches. +# cache_hierarchy = ClassicPrivateL1PrivateL2DMCache( +# l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" +# ) +cache_hierarchy = ClassicPrivateL1PrivateL2DMCache( + l1d_size="32KiB", + l1i_size="32KiB", + l2_size="256KiB", +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = SingleChannelDDR4_2400(size="1GiB") +# The remote meomry can either be a simple Memory Interface, which is from a +# different memory arange or it can be a Remote Memory Range, which has an +# inherent delay while performing reads and writes into that memory. For simple +# memory, use any MemInterfaces available in gem5 standard library. For remtoe +# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this +# config script to extend any existing MemInterface class and add latency value +# to that memory. +remote_memory = RemoteDualChannelDDR4_2400( + size="1GB", remote_offset_latency=1050 +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor(cpu_type=CPUTypes.KVM, isa=ISA.X86, num_cores=1) +# Here we setup the board which allows us to do Full-System ARM simulations. +board = X86Gem5DMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "bin/bash" +] + +# "numastat;", +# "m5 dumpresetstats 0 ;", +# # "numactl --preferred=0 -- " + +# "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + +# "1000000;", +# "numastat;", +# "m5 dumpresetstats 0;", +# "numactl --interleave=0,1 -- " + +# "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + +# "1000000;", +# "numastat;", +# "m5 dumpresetstats 0;", +# "numactl --membind=1 -- " + +# "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + +# "1000000;", +# "numastat;", +# "m5 dumpresetstats 0;", +# "m5 exit;", +# ] +board.set_kernel_disk_workload( + # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), + # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49/vmlinux"), + kernel=CustomResource("/home/kaustavg/kernel/x86/linux-6.7/vmlinux"), + # bootloader=CustomResource( + # "/home/kaustavg/.cache/gem5/x86-npb" + # ), + disk_image=DiskImageResource( + "/home/kaustavg/.cache/gem5/x86-ubuntu-img", + root_partition="1", + ), + readfile_contents=" ".join(cmd), +) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +simulator = Simulator(board=board) +simulator.run() +simulator.run() diff --git a/disaggregated_memory/configs/x86-sst-numa-nodes.py b/disaggregated_memory/configs/x86-sst-numa-nodes.py new file mode 100644 index 0000000000..e8d80ba434 --- /dev/null +++ b/disaggregated_memory/configs/x86-sst-numa-nodes.py @@ -0,0 +1,129 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system ARM Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `STREAM`. The simulation ends when the +startup is completed successfully. + +* This script has to be executed from SST +""" + +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root, AddrRange + +from boards.x86_sst_board import X86SstDMBoard +from cachehierarchies.dm_caches_sst import ClassicPrivateL1PrivateL2SstDMCache +from memories.external_remote_memory import ExternalRemoteMemoryInterface +from gem5.utils.requires import requires +from gem5.components.memory import DualChannelDDR4_2400, SingleChannelDDR4_2400 +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. +requires(isa_required=ISA.X86) +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = SingleChannelDDR4_2400(size="1GiB") +# Either suppy the size of the remote memory or the address range of the +# remote memory. Since this is inside the external memory, it does not matter +# what type of memory is being simulated. This can either be initialized with +# a size or a memory address range, which is mroe flexible. Adding remote +# memory latency automatically adds a non-coherent crossbar to simulate latenyc +remote_memory = ExternalRemoteMemoryInterface( + addr_range=AddrRange(0x40000000, size="1GiB"), remote_memory_latency=0 +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.ATOMIC, isa=ISA.X86, num_cores=1 +) +# Here we setup the board which allows us to do Full-System ARM simulations. +board = X86SstDMBoard( + clk_freq="1GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "m5 dumpresetstats 0 ;", + "numactl --cpubind=0 --membind=0 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=0,1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "m5 exit;", +] + +board.set_kernel_disk_workload( + # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), + # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49/vmlinux"), + kernel=CustomResource("/home/kaustavg/kernel/x86/linux-6.7/vmlinux"), + # bootloader=CustomResource( + # "/home/kaustavg/.cache/gem5/x86-npb" + # ), + disk_image=DiskImageResource( + "/home/kaustavg/.cache/gem5/x86-ubuntu-img", + root_partition="1", + ), + readfile_contents=" ".join(cmd), +) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +board._pre_instantiate() +root = Root(full_system=True, board=board) +board._post_instantiate() +m5.instantiate() From 8ec32299f30279eaa3fb8a93c5c6c0502d3005a6 Mon Sep 17 00:00:00 2001 From: Kaustav Goswami Date: Fri, 19 Jan 2024 23:04:21 +0000 Subject: [PATCH 23/23] configs: added a arm kvm config for dm Signed-off-by: Kaustav Goswami --- disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py b/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py index 08c1467845..ed796214ab 100644 --- a/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py +++ b/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py @@ -46,7 +46,7 @@ ) import m5 -from m5.objects import Root +from m5.objects import Root, VExpress_GEM5_V1, ArmDefaultRelease from boards.arm_gem5_board import ArmGem5DMBoard from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2SharedL3DMCache @@ -100,7 +100,9 @@ def RemoteDualChannelDDR4_2400( size="1GB", remote_offset_latency=750 ) # Here we setup the processor. We use a simple processor. -processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.ARM, num_cores=1) +processor = SimpleProcessor(cpu_type=CPUTypes.KVM, isa=ISA.ARM, num_cores=1) +release = ArmDefaultRelease() +platform = VExpress_GEM5_V1() # Here we setup the board which allows us to do Full-System ARM simulations. board = ArmGem5DMBoard( clk_freq="3GHz", @@ -108,6 +110,8 @@ def RemoteDualChannelDDR4_2400( local_memory=local_memory, remote_memory=remote_memory, cache_hierarchy=cache_hierarchy, + release=release, + platform=platform ) cmd = [