diff --git a/LICENSE.md b/LICENSE.md index f3707810..a6de0500 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,7 +1,11 @@ ************** Copyright © 2019, UChicago Argonne, LLC *************** + All Rights Reserved + Software Name: CO-Design of Exascale Storage and Network Architectures (CODES) + By: Argonne National Laboratory, Rensselaer Polytechnic Institute, Lawrence Livermore National Laboratory, and Illinois Institute of Technology + OPEN SOURCE LICENSE Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/Makefile.am b/Makefile.am index 62af4ca8..28157735 100644 --- a/Makefile.am +++ b/Makefile.am @@ -22,7 +22,7 @@ pkgconfig_DATA = maint/codes.pc $(pkgconfig_DATA): config.status EXTRA_DIST += \ - prepare.sh COPYRIGHT configure.ac uc-codes.cfg reformat.sh \ + prepare.sh LICENSE.md configure.ac uc-codes.cfg reformat.sh \ misc/README misc/ptrn_loggp-2.4.6.patch CONTRIBUTORS.md \ README.md diff --git a/README.md b/README.md index 1ef7fc51..8698835d 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,9 @@ # CODES Discrete-event Simulation Framework -https://github.com/codes-org/codes/wiki +### New? Check out the Wiki for Installation, Tutorials, and Documentation: https://github.com/codes-org/codes/wiki Discrete event driven simulation of HPC system architectures and subsystems has emerged as a productive and cost-effective means to evaluating potential HPC designs, along with capabilities for executing simulations of extreme scale systems. The goal of the CODES project is to use highly parallel simulation to explore the design of exascale storage/network architectures and distributed data-intensive science facilities. Our simulations build upon the Rensselaer Optimistic Simulation System (ROSS), a discrete event simulation framework that allows simulations to be run in parallel, decreasing the simulation run time of massive simulations to hours. We are using ROSS to explore topics including large-scale storage systems, I/O workloads, HPC network fabrics, distributed science systems, and data-intensive computation environments. The CODES project is a collaboration between the Mathematics and Computer Science department at Argonne National Laboratory and Rensselaer Polytechnic Institute. We collaborate with researchers at University of California at Davis to come up with novel methods for analysis and visualizations of large-scale event driven simulations. We also collaborate with Lawrence Livermore National Laboratory for modeling HPC interconnect systems. - -Documentation can be found in the wiki: -https://github.com/codes-org/codes/wiki diff --git a/src/networks/model-net/model-net-sched-impl.h b/codes/model-net-sched-impl.h similarity index 100% rename from src/networks/model-net/model-net-sched-impl.h rename to codes/model-net-sched-impl.h diff --git a/configure.ac b/configure.ac index 3c1869c9..a92dc150 100755 --- a/configure.ac +++ b/configure.ac @@ -2,13 +2,9 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.67]) -AC_INIT([codes], [1.1.0], [http://trac.mcs.anl.gov/projects/codes/newticket],[],[http://www.mcs.anl.gov/projects/codes/]) +AC_INIT([codes], [1.1.1], [http://trac.mcs.anl.gov/projects/codes/newticket],[],[http://www.mcs.anl.gov/projects/codes/]) LT_INIT - -#WRAP SOME OPTION AROUND THIS - IT'S NOT REQUIRED FOR 99% OF CODES -AX_BOOST_BASE([1.66]) - AC_CANONICAL_TARGET AC_CANONICAL_SYSTEM AC_CANONICAL_HOST @@ -20,8 +16,8 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AC_CONFIG_SRCDIR([doc/BUILD_STEPS]) AC_CONFIG_HEADERS([codes_config.h]) -AX_PROG_BISON([],[AC_MSG_WARN([Could not find bison])]) -AX_PROG_FLEX([],[AC_MSG_WARN([Could not find flex])]) +AX_PROG_BISON([],[AC_MSG_ERROR([could not find required package bison])]) +AX_PROG_FLEX([],[AC_MSG_ERROR([could not find required package flex])]) AC_SUBST([BISON]) AC_SUBST([FLEX]) @@ -124,6 +120,7 @@ AC_ARG_WITH([online],[AS_HELP_STRING([--with-online@<:@=DIR@:>@], [use_online=yes],[use_online=no]) if test "x${use_online}" != "xno" ; then AM_CONDITIONAL(USE_ONLINE, true) + AX_BOOST_BASE([1.66]) AX_CXX_COMPILE_STDCXX(11, noext, mandatory) PKG_CHECK_MODULES_STATIC([ARGOBOTS], [argobots], [], [AC_MSG_ERROR([Could not find working argobots installation via pkg-config])]) @@ -232,5 +229,6 @@ AC_OUTPUT([maint/codes.pc]) AC_OUTPUT([src/network-workloads/conf/dragonfly-custom/modelnet-test-dragonfly-1728-nodes.conf]) AC_OUTPUT([src/network-workloads/conf/dragonfly-plus/modelnet-test-dragonfly-plus.conf]) AC_OUTPUT([src/network-workloads/conf/dragonfly-dally/modelnet-test-dragonfly-dally.conf]) +AC_OUTPUT([doc/example/tutorial-ping-pong.conf]) diff --git a/doc/Makefile.subdir b/doc/Makefile.subdir index 7c8b3058..76c86d5e 100644 --- a/doc/Makefile.subdir +++ b/doc/Makefile.subdir @@ -2,8 +2,11 @@ noinst_PROGRAMS += \ doc/example/example \ doc/example_heterogeneous/example +bin_PROGRAMS += doc/example/tutorial-synthetic-ping-pong + doc_example_example_SOURCES = doc/example/example.c doc_example_heterogeneous_example_SOURCES = doc/example_heterogeneous/example.c +doc_example_tutorial_synthetic_ping_pong_SOURCES = doc/example/tutorial-synthetic-ping-pong.c EXTRA_DIST += \ doc/BUILD_STEPS \ @@ -15,6 +18,7 @@ EXTRA_DIST += \ doc/codes-best-practices.tex \ doc/Makefile \ doc/example/example.conf \ + doc/example/tutorial-ping-pong.conf \ doc/example_heterogeneous/example.conf \ doc/example_heterogeneous/example_torus.conf \ doc/example_heterogeneous/README \ diff --git a/codes-vis-readme.md b/doc/codes-vis-readme.md similarity index 100% rename from codes-vis-readme.md rename to doc/codes-vis-readme.md diff --git a/doc/example/tutorial-ping-pong.conf b/doc/example/tutorial-ping-pong.conf new file mode 100644 index 00000000..ada5ccbd --- /dev/null +++ b/doc/example/tutorial-ping-pong.conf @@ -0,0 +1,53 @@ +LPGROUPS +{ + MODELNET_GRP + { + repetitions="36"; +# name of this lp changes according to the model + nw-lp="2"; +# these lp names will be the same for dragonfly-custom model + modelnet_dragonfly_dally="2"; + modelnet_dragonfly_dally_router="1"; + } +} +PARAMS +{ +# packet size in the network + packet_size="4096"; + modelnet_order=( "dragonfly_dally","dragonfly_dally_router" ); + # scheduler options + modelnet_scheduler="fcfs"; +# chunk size in the network (when chunk size = packet size, packets will not be +# divided into chunks) + chunk_size="4096"; +# modelnet_scheduler="round-robin"; +# number of routers in group + num_routers="4"; +# number of groups in the network + num_groups="9"; +# buffer size in bytes for local virtual channels + local_vc_size="16384"; +#buffer size in bytes for global virtual channels + global_vc_size="16384"; +#buffer size in bytes for compute node virtual channels + cn_vc_size="32768"; +#bandwidth in GiB/s for local channels + local_bandwidth="2.0"; +# bandwidth in GiB/s for global channels + global_bandwidth="2.0"; +# bandwidth in GiB/s for compute node-router channels + cn_bandwidth="2.0"; +# ROSS message size + message_size="736"; +# number of compute nodes connected to router, dictated by dragonfly config +# file + num_cns_per_router="2"; +# number of global channels per router + num_global_channels="2"; +# network config file for intra-group connections + intra-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; +# network config file for inter-group connections + inter-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; +# routing protocol to be used + routing="prog-adaptive"; +} diff --git a/doc/example/tutorial-ping-pong.conf.in b/doc/example/tutorial-ping-pong.conf.in new file mode 100644 index 00000000..d8757459 --- /dev/null +++ b/doc/example/tutorial-ping-pong.conf.in @@ -0,0 +1,53 @@ +LPGROUPS +{ + MODELNET_GRP + { + repetitions="36"; +# name of this lp changes according to the model + nw-lp="2"; +# these lp names will be the same for dragonfly-custom model + modelnet_dragonfly_dally="2"; + modelnet_dragonfly_dally_router="1"; + } +} +PARAMS +{ +# packet size in the network + packet_size="4096"; + modelnet_order=( "dragonfly_dally","dragonfly_dally_router" ); + # scheduler options + modelnet_scheduler="fcfs"; +# chunk size in the network (when chunk size = packet size, packets will not be +# divided into chunks) + chunk_size="4096"; +# modelnet_scheduler="round-robin"; +# number of routers in group + num_routers="4"; +# number of groups in the network + num_groups="9"; +# buffer size in bytes for local virtual channels + local_vc_size="16384"; +#buffer size in bytes for global virtual channels + global_vc_size="16384"; +#buffer size in bytes for compute node virtual channels + cn_vc_size="32768"; +#bandwidth in GiB/s for local channels + local_bandwidth="2.0"; +# bandwidth in GiB/s for global channels + global_bandwidth="2.0"; +# bandwidth in GiB/s for compute node-router channels + cn_bandwidth="2.0"; +# ROSS message size + message_size="736"; +# number of compute nodes connected to router, dictated by dragonfly config +# file + num_cns_per_router="2"; +# number of global channels per router + num_global_channels="2"; +# network config file for intra-group connections + intra-group-connections="@abs_srcdir@/../../src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; +# network config file for inter-group connections + inter-group-connections="@abs_srcdir@/../../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; +# routing protocol to be used + routing="prog-adaptive"; +} diff --git a/doc/example/tutorial-synthetic-ping-pong.c b/doc/example/tutorial-synthetic-ping-pong.c new file mode 100644 index 00000000..7f219aa3 --- /dev/null +++ b/doc/example/tutorial-synthetic-ping-pong.c @@ -0,0 +1,342 @@ +/* + * Copyright (C) 2019 Neil McGlohon + * See LICENSE notice in top-level directory + */ + +#include "codes/model-net.h" +#include "codes/lp-io.h" +#include "codes/codes.h" +#include "codes/codes_mapping.h" +#include "codes/configuration.h" +#include "codes/lp-type-lookup.h" + + +static int net_id = 0; +static int PAYLOAD_SZ = 4096; +static unsigned long long num_nodes = 0; + +static char lp_io_dir[256] = {'\0'}; +static lp_io_handle io_handle; +static unsigned int lp_io_use_suffix = 0; +static int do_lp_io = 0; + +static int num_msgs = 20; + +typedef struct svr_msg svr_msg; +typedef struct svr_state svr_state; + +/* global variables for codes mapping */ +static char group_name[MAX_NAME_LENGTH]; +static char lp_type_name[MAX_NAME_LENGTH]; +static int group_index, lp_type_index, rep_id, offset; + +/* type of events */ +enum svr_event +{ + KICKOFF = 1, + PING, + PONG +}; + +struct svr_msg +{ + enum svr_event svr_event_type; //KICKOFF, PING, or PONG + int sender_id; //ID of the sender workload LP to know who to send a PONG message back to + int payload_value; //Some value that we will encode as an example + model_net_event_return event_rc; //helper to encode data relating to CODES rng usage +}; + +struct svr_state +{ + int svr_id; /* the ID of this server */ + int ping_msg_sent_count; /* PING messages sent */ + int ping_msg_recvd_count; /* PING messages received */ + int pong_msg_sent_count; /* PONG messages sent */ + int pong_msg_recvd_count; /* PONG messages received */ + tw_stime start_ts; /* time that this LP started sending requests */ + tw_stime end_ts; /* time that this LP ended sending requests */ + int payload_sum; /* the running sum of all payloads received */ +}; + +/* declaration of functions */ +static void svr_init(svr_state * s, tw_lp * lp); +static void svr_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp); +static void svr_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp); +static void svr_finalize(svr_state * s, tw_lp * lp); +static tw_stime ns_to_s(tw_stime ns); +static tw_stime s_to_ns(tw_stime s); + +/* ROSS lptype function callback mapping */ +tw_lptype svr_lp = { + (init_f) svr_init, + (pre_run_f) NULL, + (event_f) svr_event, + (revent_f) svr_rev_event, + (commit_f) NULL, + (final_f) svr_finalize, + (map_f) codes_mapping, + sizeof(svr_state), +}; + +const tw_optdef app_opt [] = +{ + TWOPT_GROUP("Model net synthetic traffic " ), + TWOPT_UINT("num_messages", num_msgs, "Number of PING messages to be generated per terminal "), + TWOPT_UINT("payload_sz",PAYLOAD_SZ, "size of the message being sent "), + TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"), + TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"), + TWOPT_END() +}; + +const tw_lptype* svr_get_lp_type() +{ + return(&svr_lp); +} + +static void svr_add_lp_type() +{ + lp_type_register("nw-lp", svr_get_lp_type()); +} + +static void svr_init(svr_state * s, tw_lp * lp) +{ + //Initialize State + s->ping_msg_sent_count = 0; + s->ping_msg_recvd_count = 0; + s->pong_msg_sent_count = 0; + s->pong_msg_recvd_count = 0; + s->start_ts = 0.0; + s->end_ts = 0.0; + s->svr_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); /* turns the LP Global ID into the server ID */ + s->payload_sum = 0; + + //Now we create and send a self KICKOFF message - this is a PDES coordination event and thus doesn't need to be injected into the connected network + //so we won't use model_net_event(), that's reserved for stuff we want to send across the network + + /* Set a time from now when this message is to be received by the recipient (self in this cae.) add some tiny random noise to help avoid event ties (different events with same timestamp) */ + //the lookahead value is a value required for conservative mode execution to work, it prevents scheduling a new event within the lookahead window + tw_stime kickoff_time = g_tw_lookahead + (tw_rand_unif(lp->rng) * .0001); + + tw_event *e; + svr_msg *m; + e = tw_event_new(lp->gid, kickoff_time, lp); //ROSS method to create a new event + m = tw_event_data(e); //Gives you a pointer to the data encoded within event e + m->svr_event_type = KICKOFF; //Set the event type so we can know how to classify the event when received + tw_event_send(e); //ROSS method to send off the event e with the encoded data in m +} + +static void handle_kickoff_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) +{ + s->start_ts = tw_now(lp); //the time when we're starting this LP's work is NOW + + svr_msg * ping_msg = malloc(sizeof(svr_msg)); //allocate memory for new message + + tw_lpid local_dest = -1; //ID of a sever, relative to only servers + tw_lpid global_dest = -1; //ID of a server LP relative to ALL LPs + + //We want to make sure we're not accidentally picking ourselves + local_dest = tw_rand_integer(lp->rng, 1, num_nodes - 2); + local_dest = (s->svr_id + local_dest) % num_nodes; + //local_dest is now a number [0,num_nodes) but is assuredly not s->svr_id + assert(local_dest >= 0); + assert(local_dest < num_nodes); + assert(local_dest != s->svr_id); + + ping_msg->sender_id = s->svr_id; //encode our server ID into the new ping message + ping_msg->svr_event_type = PING; //set it to type PING + ping_msg->payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it from [1,10] + + codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server + global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0); + s->ping_msg_sent_count++; + m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)ping_msg, 0, NULL, lp); +} + +static void handle_kickoff_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) +{ + tw_rand_reverse_unif(lp->rng); //reverse the rng call for getting a local_dest + tw_rand_reverse_unif(lp->rng); //reverse the rng call for creating a payload value; + + s->ping_msg_sent_count--; //undo the increment of the ping_msg_sent_count in the server state + model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message +} + +static void handle_ping_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) +{ + s->ping_msg_recvd_count++; //increment the counter for ping messages received + + int original_sender = m->sender_id; //this is the server we need to send a PONG message back to + s->payload_sum += m->payload_value; //increment our running sum of payload values received + + svr_msg * pong_msg = malloc(sizeof(svr_msg)); //allocate memory for new message + pong_msg->sender_id = s->svr_id; + pong_msg->svr_event_type = PONG; + // only ping messages contain a payload value - not every value in a message struct must be utilized by all messages! + + codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server + tw_lpid global_dest = codes_mapping_get_lpid_from_relative(original_sender, group_name, lp_type_name, NULL, 0); + s->pong_msg_sent_count++; + m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)pong_msg, 0, NULL, lp); +} + +static void handle_ping_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) +{ + s->ping_msg_recvd_count--; //undo the increment of the counter for ping messages received + s->payload_sum -= m->payload_value; //undo the increment of the payload sum + + model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message +} + +static void handle_pong_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) +{ + s->pong_msg_recvd_count++; //increment the counter for ping messages received + + if(s->ping_msg_sent_count >= num_msgs) //if we've sent enough ping messages, then we stop and don't send any more + { + b->c1 = 1; //flag that we didn't really do anything in this event so that if this event gets reversed, we don't over-aggressively revert state or RNGs + return; + } + + //Now we need to send another ping message back to the sender of the pong + int pong_sender = m->sender_id; //this is the sender of the PONG message that we want to send another PING message to + + svr_msg * ping_msg = malloc(sizeof(svr_msg)); //allocate memory for new message + ping_msg->sender_id = s->svr_id; //encode our server ID into the new ping message + ping_msg->svr_event_type = PING; //set it to type PING + ping_msg->payload_value = tw_rand_integer(lp->rng, 1, 10); //encode a random payload value to it + + codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, NULL, &rep_id, &offset); //gets information from CODES necessary to get the global LP ID of a server + tw_lpid global_dest = codes_mapping_get_lpid_from_relative(pong_sender, group_name, lp_type_name, NULL, 0); + s->ping_msg_sent_count++; + m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)ping_msg, 0, NULL, lp); +} + +static void handle_pong_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) +{ + s->pong_msg_recvd_count--; //undo the increment of the counter for ping messages received + + if (b->c1) //if we flipped the c1 flag in the forward event + return; //then we don't need to undo any rngs or state change + + tw_rand_reverse_unif(lp->rng); //undo the rng for the new payload value + s->ping_msg_sent_count--; + model_net_event_rc2(lp, &m->event_rc); //undo any model_net_event calls encoded into this message +} + +static void svr_finalize(svr_state * s, tw_lp * lp) +{ + s->end_ts = tw_now(lp); + + int total_msgs_sent = s->ping_msg_sent_count + s->pong_msg_sent_count; + int total_msg_size_sent = PAYLOAD_SZ * total_msgs_sent; + tw_stime time_in_seconds_sent = ns_to_s(s->end_ts - s->start_ts); + + printf("Sever LPID:%llu svr_id:%d sent %d bytes in %f seconds, PINGs Sent: %d; PONGs Received: %d; PINGs Received: %d; PONGs Sent %d; Payload Sum: %d\n", (unsigned long long)lp->gid, s->svr_id, total_msg_size_sent, + time_in_seconds_sent, s->ping_msg_sent_count, s->pong_msg_recvd_count, s->ping_msg_recvd_count, s->pong_msg_sent_count, s->payload_sum); +} + +static void svr_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) +{ + switch (m->svr_event_type) + { + case KICKOFF: + handle_kickoff_event(s, b, m, lp); + break; + case PING: + handle_ping_event(s, b, m, lp); + break; + case PONG: + handle_pong_event(s, b, m, lp); + break; + default: + tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type); + break; + } +} + +static void svr_rev_event(svr_state * s, tw_bf * b, svr_msg * m, tw_lp * lp) +{ + switch (m->svr_event_type) + { + case KICKOFF: + handle_kickoff_rev_event(s, b, m, lp); + break; + case PING: + handle_ping_rev_event(s, b, m, lp); + break; + case PONG: + handle_pong_rev_event(s, b, m, lp); + break; + default: + tw_error(TW_LOC, "\n Invalid message type %d ", m->svr_event_type); + break; + } +} + +/* convert ns to seconds */ +static tw_stime ns_to_s(tw_stime ns) +{ + return(ns / (1000.0 * 1000.0 * 1000.0)); +} +static tw_stime s_to_ns(tw_stime s) +{ + return(s*1000.0*1000.0*1000.0); +} + +int main(int argc, char **argv) +{ + int nprocs; + int rank; + int num_nets; + int *net_ids; + + tw_opt_add(app_opt); + tw_init(&argc, &argv); + + codes_comm_update(); + + if(argc < 2) + { + printf("\n Usage: mpirun --sync=1/2/3 -- "); + MPI_Finalize(); + return 0; + } + + MPI_Comm_rank(MPI_COMM_CODES, &rank); + MPI_Comm_size(MPI_COMM_CODES, &nprocs); + + configuration_load(argv[2], MPI_COMM_CODES, &config); + + model_net_register(); + svr_add_lp_type(); + + codes_mapping_setup(); + + net_ids = model_net_configure(&num_nets); + net_id = *net_ids; + free(net_ids); + + /* 1 day of simulation time is drastically huge but it will ensure + that the simulation doesn't try to end before all packets are delivered */ + g_tw_ts_end = s_to_ns(24 * 60 * 60); + + num_nodes = codes_mapping_get_lp_count("MODELNET_GRP", 0, "nw-lp", NULL, 1); //get the number of nodes so we can use this value during the simulation + assert(num_nodes); + + if(lp_io_dir[0]) + { + do_lp_io = 1; + int flags = lp_io_use_suffix ? LP_IO_UNIQ_SUFFIX : 0; + int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES); + assert(ret == 0 || !"lp_io_prepare failure"); + } + tw_run(); + if (do_lp_io){ + int ret = lp_io_flush(io_handle, MPI_COMM_CODES); + assert(ret == 0 || !"lp_io_flush failure"); + } + model_net_report_stats(net_id); + + tw_end(); + return 0; +} \ No newline at end of file diff --git a/reformat.sh b/misc/reformat.sh similarity index 100% rename from reformat.sh rename to misc/reformat.sh diff --git a/uc-codes.cfg b/misc/uc-codes.cfg similarity index 100% rename from uc-codes.cfg rename to misc/uc-codes.cfg diff --git a/src/Makefile.subdir b/src/Makefile.subdir index e61bb307..7a7d1a6d 100644 --- a/src/Makefile.subdir +++ b/src/Makefile.subdir @@ -90,6 +90,7 @@ nobase_include_HEADERS = \ codes/model-net-method.h \ codes/model-net-lp.h \ codes/model-net-sched.h \ + codes/model-net-sched-impl.h \ codes/model-net-inspect.h \ codes/connection-manager.h \ codes/net/common-net.h \ @@ -158,7 +159,7 @@ src_libcodes_la_SOURCES = \ src/workload/methods/codes-iomock-wrkld.c \ codes/rc-stack.h \ src/util/rc-stack.c \ - src/networks/model-net/model-net.c \ + src/networks/model-net/core/model-net.c \ src/networks/model-net/common-net.c \ src/networks/model-net/simplenet-upd.c \ src/networks/model-net/torus.c \ @@ -171,10 +172,9 @@ src_libcodes_la_SOURCES = \ src/networks/model-net/fattree.c \ src/networks/model-net/loggp.c \ src/networks/model-net/simplep2p.c \ - src/networks/model-net/model-net-lp.c \ - src/networks/model-net/model-net-sched.c \ - src/networks/model-net/model-net-sched-impl.h \ - src/networks/model-net/model-net-sched-impl.c + src/networks/model-net/core/model-net-lp.c \ + src/networks/model-net/core/model-net-sched.c \ + src/networks/model-net/core/model-net-sched-impl.c src_libcodes_mpi_replay_la_SOURCES = \ src/network-workloads/model-net-mpi-replay.c @@ -197,11 +197,13 @@ bin_PROGRAMS += src/networks/model-net/topology-test bin_PROGRAMS += src/network-workloads/model-net-mpi-replay bin_PROGRAMS += src/network-workloads/model-net-dumpi-traces-dump bin_PROGRAMS += src/network-workloads/model-net-synthetic -bin_PROGRAMS += src/network-workloads/model-net-synthetic-custom-dfly bin_PROGRAMS += src/network-workloads/model-net-synthetic-slimfly bin_PROGRAMS += src/network-workloads/model-net-synthetic-fattree -bin_PROGRAMS += src/network-workloads/model-net-synthetic-dfly-plus -bin_PROGRAMS += src/network-workloads/model-net-synthetic-dally-dfly +bin_PROGRAMS += src/network-workloads/model-net-synthetic-dragonfly-all +bin_PROGRAMS += src/network-workloads/archived/model-net-synthetic-custom-dfly +bin_PROGRAMS += src/network-workloads/archived/model-net-synthetic-dfly-plus +bin_PROGRAMS += src/network-workloads/archived/model-net-synthetic-dally-dfly + src_workload_codes_workload_dump_SOURCES = \ @@ -214,9 +216,10 @@ src_network_workloads_model_net_mpi_replay_SOURCES = \ src/network-workloads/model-net-mpi-replay-main.c src_network_workloads_model_net_mpi_replay_CFLAGS = $(AM_CFLAGS) src_network_workloads_model_net_synthetic_SOURCES = src/network-workloads/model-net-synthetic.c -src_network_workloads_model_net_synthetic_custom_dfly_SOURCES = src/network-workloads/model-net-synthetic-custom-dfly.c -src_network_workloads_model_net_synthetic_dfly_plus_SOURCES = src/network-workloads/model-net-synthetic-dfly-plus.c -src_network_workloads_model_net_synthetic_dally_dfly_SOURCES = src/network-workloads/model-net-synthetic-dally-dfly.c +src_network_workloads_model_net_synthetic_custom_dfly_SOURCES = src/network-workloads/archived/model-net-synthetic-custom-dfly.c +src_network_workloads_model_net_synthetic_dfly_plus_SOURCES = src/network-workloads/archived/model-net-synthetic-dfly-plus.c +src_network_workloads_model_net_synthetic_dally_dfly_SOURCES = src/network-workloads/archived/model-net-synthetic-dally-dfly.c +src_network_workloads_model_net_synthetic_dragonfly_all_SOURCES = src/network-workloads/model-net-synthetic-dragonfly-all.c src_networks_model_net_topology_test_SOURCES = src/networks/model-net/topology-test.c #bin_PROGRAMS += src/network-workload/codes-nw-test diff --git a/src/network-workloads/model-net-synthetic-custom-dfly.c b/src/network-workloads/archived/model-net-synthetic-custom-dfly.c similarity index 98% rename from src/network-workloads/model-net-synthetic-custom-dfly.c rename to src/network-workloads/archived/model-net-synthetic-custom-dfly.c index fbb16f7b..69b4f3ca 100644 --- a/src/network-workloads/model-net-synthetic-custom-dfly.c +++ b/src/network-workloads/archived/model-net-synthetic-custom-dfly.c @@ -311,7 +311,7 @@ static void handle_kickoff_event( int rand_node_intra_id = tw_rand_integer(lp->rng, 0, num_nodes_per_grp-1); local_dest = (rand_group * num_nodes_per_grp) + rand_node_intra_id; - printf("\n LP %ld sending to %ld num nodes %d ", local_id, local_dest, num_nodes); + printf("\n LP %d sending to %llu num nodes %llu ", local_id, LLU(local_dest), num_nodes); } assert(local_dest < num_nodes); @@ -371,11 +371,6 @@ static void handle_local_event( (void)lp; ns->local_recvd_count++; } -/* convert ns to seconds */ -static tw_stime ns_to_s(tw_stime ns) -{ - return(ns / (1000.0 * 1000.0 * 1000.0)); -} /* convert seconds to ns */ static tw_stime s_to_ns(tw_stime ns) diff --git a/src/network-workloads/model-net-synthetic-dally-dfly.c b/src/network-workloads/archived/model-net-synthetic-dally-dfly.c similarity index 98% rename from src/network-workloads/model-net-synthetic-dally-dfly.c rename to src/network-workloads/archived/model-net-synthetic-dally-dfly.c index 1472b4c0..4899a266 100644 --- a/src/network-workloads/model-net-synthetic-dally-dfly.c +++ b/src/network-workloads/archived/model-net-synthetic-dally-dfly.c @@ -314,7 +314,7 @@ static void handle_kickoff_event( int rand_node_intra_id = tw_rand_integer(lp->rng, 0, num_nodes_per_grp-1); local_dest = (rand_group * num_nodes_per_grp) + rand_node_intra_id; - printf("\n LP %ld sending to %ld num nodes %d ", local_id, local_dest, num_nodes); + printf("\n LP %d sending to %llu num nodes %llu ", local_id, LLU(local_dest), num_nodes); } assert(local_dest < num_nodes); @@ -374,11 +374,6 @@ static void handle_local_event( (void)lp; ns->local_recvd_count++; } -/* convert ns to seconds */ -static tw_stime ns_to_s(tw_stime ns) -{ - return(ns / (1000.0 * 1000.0 * 1000.0)); -} /* convert seconds to ns */ static tw_stime s_to_ns(tw_stime ns) @@ -452,7 +447,6 @@ int main( int rank; int num_nets; int *net_ids; - int num_router_rows, num_router_cols; tw_opt_add(app_opt); tw_init(&argc, &argv); diff --git a/src/network-workloads/model-net-synthetic-dfly-plus.c b/src/network-workloads/archived/model-net-synthetic-dfly-plus.c similarity index 99% rename from src/network-workloads/model-net-synthetic-dfly-plus.c rename to src/network-workloads/archived/model-net-synthetic-dfly-plus.c index 9147fe66..209cf29d 100644 --- a/src/network-workloads/model-net-synthetic-dfly-plus.c +++ b/src/network-workloads/archived/model-net-synthetic-dfly-plus.c @@ -333,11 +333,6 @@ static void handle_local_event( (void)lp; ns->local_recvd_count++; } -/* convert ns to seconds */ -static tw_stime ns_to_s(tw_stime ns) -{ - return(ns / (1000.0 * 1000.0 * 1000.0)); -} /* convert seconds to ns */ static tw_stime s_to_ns(tw_stime ns) diff --git a/src/network-workloads/conf/dragonfly-dally/dfdally_3k.conf b/src/network-workloads/conf/dragonfly-dally/dfdally_3k.conf new file mode 100644 index 00000000..9ccf7f04 --- /dev/null +++ b/src/network-workloads/conf/dragonfly-dally/dfdally_3k.conf @@ -0,0 +1,44 @@ +LPGROUPS +{ + MODELNET_GRP + { + repetitions="342"; + nw-lp="9"; + modelnet_dragonfly_dally="9"; + modelnet_dragonfly_dally_router="1"; + } +} +PARAMS +{ + packet_size="4096"; + message_size="736"; + chunk_size="4096"; + modelnet_scheduler="fcfs"; + modelnet_order=("dragonfly_dally","dragonfly_dally_router"); + num_routers="18"; + num_router_rows="1"; + num_router_cols="18"; + num_groups="19"; + router_delay="90"; + local_vc_size="65536"; + global_vc_size="65536"; + cn_vc_size="65536"; + local_bandwidth="12.5"; + global_bandwidth="25.0"; + cn_bandwidth="6.25"; + num_cns_per_router="9"; + num_global_channels="9"; + intra-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-3k-intra"; + inter-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-3k-inter"; + routing="prog-adaptive"; + adaptive_threshold="0"; + minimal-bias="0"; + num_injection_queues="1"; + nic_seq_delay="10"; + node_copy_queues="1"; + node_eager_limit="16000"; + df-dally-vc="1"; + num_row_chans="1"; + num_col_chans="1"; + auto_credit_delay="0"; +} diff --git a/src/network-workloads/conf/dragonfly-dally/dfdally_72.conf b/src/network-workloads/conf/dragonfly-dally/dfdally_72.conf index 0c358fee..68ba6f5e 100644 --- a/src/network-workloads/conf/dragonfly-dally/dfdally_72.conf +++ b/src/network-workloads/conf/dragonfly-dally/dfdally_72.conf @@ -45,7 +45,7 @@ PARAMS # number of global channels per router num_global_channels="2"; # network config file for intra-group connections - intra-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-intra" + intra-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-intra"; # network config file for inter-group connections inter-group-connections="../src/network-workloads/conf/dragonfly-dally/dfdally-72-inter"; # routing protocol to be used diff --git a/src/network-workloads/conf/dragonfly-plus/dfp-test.conf b/src/network-workloads/conf/dragonfly-plus/dfp-test.conf index 0210d64b..d3f8b737 100644 --- a/src/network-workloads/conf/dragonfly-plus/dfp-test.conf +++ b/src/network-workloads/conf/dragonfly-plus/dfp-test.conf @@ -57,6 +57,6 @@ PARAMS # routing="minimal"; routing="prog-adaptive"; # route scoring protocol to be used - options are 'alpha' or 'beta' - route_scoring_metric="beta"; + route_scoring_metric="delta"; } diff --git a/src/network-workloads/conf/slimfly/ffly_3k.conf b/src/network-workloads/conf/slimfly/ffly_3k.conf new file mode 100644 index 00000000..ab7a3179 --- /dev/null +++ b/src/network-workloads/conf/slimfly/ffly_3k.conf @@ -0,0 +1,42 @@ +LPGROUPS +{ + MODELNET_GRP + { + repetitions="338"; + nw-lp="9"; + modelnet_slimfly="9"; + modelnet_slimfly_router="2"; + } +} +PARAMS +{ + sf_type="1"; + num_rails="2"; + rail_select="congestion"; + packet_size="4096"; + message_size="736"; + chunk_size="4096"; + modelnet_scheduler="fcfs"; + modelnet_order=("slimfly", "slimfly_router"); + num_vcs="4"; + num_routers="13"; + num_terminals="9"; + local_channels="6"; + global_channels="13"; + router_delay="90"; + link_delay="0"; + generator_set_X=("1","10","9","12","3","4"); + generator_set_X_prime=("6","8","2","7","5","11"); + local_vc_size="65536"; + global_vc_size="65536"; + cn_vc_size="65536"; + local_bandwidth="12.5"; + global_bandwidth="12.5"; + cn_bandwidth="12.5"; + routing="adaptive"; + csf_ratio="1"; + num_injection_queues="2"; + nic_seq_delay="10"; + node_copy_queues="2"; + node_eager_limit="16000"; +} diff --git a/src/network-workloads/conf/slimfly/sfly_3k.conf b/src/network-workloads/conf/slimfly/sfly_3k.conf new file mode 100644 index 00000000..5282acdb --- /dev/null +++ b/src/network-workloads/conf/slimfly/sfly_3k.conf @@ -0,0 +1,79 @@ +LPGROUPS +{ + MODELNET_GRP + { + # How many times should these LP types be replicated? + # Total number of specific LPs = x + repetitions="338"; + # Number of workload LPs per repetition. If nw-lp == number terminal LPs per repetition, this means that there will be one workload rank per terminal + nw-lp="9"; + # Number of teriminal LPs per repetition. Total terminal LPs in modelnet_grp must match total terminals configured + modelnet_slimfly="9"; + # number of router LPs per repetition. Total router LPs in modelnet_grp must match total routers configured + modelnet_slimfly_router="1"; + } +} +PARAMS +{ +#ROSS PDES Parameters + # Order of mapping the model specific LPs + modelnet_order=("slimfly", "slimfly_router"); + # ROSS Message size - Based on size of message defined in header file (to be deprecated if possible) + message_size="736"; + +#General CODES + # Size of packets generated + packet_size="4096"; + # Size of chunks transmitted - to not chunk together packets set chunk_size == packet_size + chunk_size="4096"; + # Scheduling algorithm for packet scheduling by workload: fcfs, round-robin, priority (will require additional parameters found in model-net-lp.c) + modelnet_scheduler="fcfs"; + +#Multi-Rail Parameters - remove all for default Slim Fly behavior: + # Slim Fly (0) or Multi-Rail Slim Fly: Fit Fly (1) - to be deprecated in favor of just specifying num_rails + sf_type="0"; + # Number of rails (default 1) + num_rails="1"; + # Algorithm for selection of which rail traffic should be injected through: none (ignores additional rails), congestion (picks one with least local congestion), path (picks one with shortest path to destination), random (picks rail uniformly at random) + rail_select="none"; + # Number of injection queues for workload LPs (should equal number of rails) + num_injection_queues="1"; + # Number of sub-queues per NIC for packet (recommendation undocumented - default 1) + node_copy_queues="1"; + +#Topology Specific Parameters (mutually dependent and must be compatible with : + # Generator Set X for MMS-2 Slim Fly Graph + generator_set_X=("1","10","9","12","3","4"); + # Generator Set X' for MMS-2 Slim Fly Graph + generator_set_X_prime=("6","8","2","7","5","11"); + # Number of routers per Slim Fly group - Dependent on X and X' + num_routers="13"; + # Number of ports actively dedicated to local router connections per router (intra-group) + local_channels="6"; + # Number of ports actively dedicated to global router connections per router (inter-group) + global_channels="13"; + +#Easily Configurable Parameters (not dependent on topology, can usually change without breaking) + # Number of terminals per Slim Fly router + num_terminals="9"; + # Delay (in ns) added by router to routing packets to simulate processing from input port to output port + router_delay="90"; + # Size of local router-router VC in bytes + local_vc_size="65536"; + # Size of global router-router VC in bytes + global_vc_size="65536"; + # Size of terminal-router VC in bytes + cn_vc_size="65536"; + # Bandwidth of local router-router links in GiB/s + local_bandwidth="12.5"; + # Bandwidth of global router-router links in GiB/s + global_bandwidth="12.5"; + # Bandwidth of terminal-router links in GiB/s + cn_bandwidth="12.5"; + # Routing algorithm employed: minimal (Shortest Path), nonminimal (Valiant), adaptive (UGAL with local information) + routing="adaptive"; + # Cost multiplier for scoring of nonminimal links in comparison to minimal with adaptive routing. csf_ratio=2 means nonminimal link congestion is evaluated at 2x the same amount of congestion in a minimal link + csf_ratio="1"; + # Number of Virtual Channels per port - Not changeable without adapting source code routing + num_vcs="4"; +} diff --git a/src/network-workloads/model-net-dumpi-traces-dump.c b/src/network-workloads/model-net-dumpi-traces-dump.c index c1b3944c..41587018 100644 --- a/src/network-workloads/model-net-dumpi-traces-dump.c +++ b/src/network-workloads/model-net-dumpi-traces-dump.c @@ -365,7 +365,7 @@ void nw_test_finalize(nw_state* s, tw_lp* lp) total_delays += s->num_delays; total_collectives += s->num_cols; - printf("\n LP %llu total sends %ld receives %ld wait_alls %ld waits %ld ", lp->gid, s->num_sends,s->num_recvs, s->num_waitall, s->num_wait); + printf("\n LP %llu total sends %ld receives %ld wait_alls %ld waits %ld ", LLU(lp->gid), s->num_sends,s->num_recvs, s->num_waitall, s->num_wait); avg_time += s->total_time; avg_compute_time += s->compute_time; avg_comm_time += (s->total_time - s->compute_time); diff --git a/src/network-workloads/model-net-mpi-replay.c b/src/network-workloads/model-net-mpi-replay.c index 7252f69e..9f6488d9 100644 --- a/src/network-workloads/model-net-mpi-replay.c +++ b/src/network-workloads/model-net-mpi-replay.c @@ -62,8 +62,7 @@ static int priority_type = 0; static int num_dumpi_traces = 0; static int64_t EAGER_THRESHOLD = 8192; -static long num_ops = 0; -static int upper_threshold = 1048576; +// static int upper_threshold = 1048576; static int alloc_spec = 0; static tw_stime self_overhead = 10.0; static tw_stime mean_interval = 100000; @@ -633,7 +632,7 @@ void finish_bckgnd_traffic( (void)b; (void)msg; ns->is_finished = 1; - lprintf("\n LP %llu completed sending data %lu completed at time %lf ", LLU(lp->gid), ns->gen_data, tw_now(lp)); + lprintf("\n LP %llu completed sending data %llu completed at time %lf ", LLU(lp->gid), ns->gen_data, tw_now(lp)); return; } @@ -894,7 +893,7 @@ void arrive_syn_tr(nw_state * s, tw_bf * bf, nw_message * m, tw_lp * lp) if(PRINT_SYNTH_TRAFFIC) { if(s->local_rank == 0) { - printf("\n Data arrived %lld rank %llu total data %ld ", m->fwd.num_bytes, s->nw_id, s->syn_data); + printf("\n Data arrived %llu rank %llu total data %llu ", LLU(m->fwd.num_bytes), LLU(s->nw_id), s->syn_data); /* if(s->syn_data > upper_threshold) if(s->local_rank == 0) { @@ -2509,7 +2508,7 @@ static void get_next_mpi_operation(nw_state* s, tw_bf * bf, nw_message * m, tw_l /* Notify ranks from other job that checkpoint traffic has * completed */ - printf("\n Network node %d Rank %d finished at %lf ", s->local_rank, s->nw_id, tw_now(lp)); + printf("\n Network node %d Rank %llu finished at %lf ", s->local_rank, LLU(s->nw_id), tw_now(lp)); int num_jobs = codes_jobmap_get_num_jobs(jobmap_ctx); if(num_jobs <= 1 || is_synthetic == 0) { @@ -2678,15 +2677,15 @@ void nw_test_finalize(nw_state* s, tw_lp* lp) if(count_irecv > 0 || count_isend > 0) { unmatched = 1; - printf("\n nw-id %lld unmatched irecvs %d unmatched sends %d Total sends %ld receives %ld collectives %ld delays %ld wait alls %ld waits %ld send time %lf wait %lf", - s->nw_id, count_irecv, count_isend, s->num_sends, s->num_recvs, s->num_cols, s->num_delays, s->num_waitall, s->num_wait, s->send_time, s->wait_time); + printf("\n nw-id %llu unmatched irecvs %d unmatched sends %d Total sends %ld receives %ld collectives %ld delays %ld wait alls %ld waits %ld send time %lf wait %lf", + LLU(s->nw_id) , count_irecv, count_isend, s->num_sends, s->num_recvs, s->num_cols, s->num_delays, s->num_waitall, s->num_wait, s->send_time, s->wait_time); } written = 0; if(!s->nw_id) written = sprintf(s->output_buf, "# Format "); - written += sprintf(s->output_buf + written, "\n %llu %llu %d %d %ld %ld %ld %ld %lf %lf %lf %lf %lf", LLU(lp->gid), LLU(s->nw_id), s->app_id, s->local_rank, s->num_sends, s->num_recvs, s->num_bytes_sent, + written += sprintf(s->output_buf + written, "\n %llu %llu %d %d %ld %ld %llu %llu %lf %lf %lf %lf %lf", LLU(lp->gid), LLU(s->nw_id), s->app_id, s->local_rank, s->num_sends, s->num_recvs, s->num_bytes_sent, s->num_bytes_recvd, s->send_time, s->elapsed_time - s->compute_time, s->compute_time, avg_msg_time, s->max_time); lp_io_write(lp->gid, (char*)"mpi-replay-stats", written, s->output_buf); @@ -2882,6 +2881,8 @@ void nw_lp_model_stat_collect(nw_state *s, tw_lp *lp, char *buffer) void ross_nw_lp_sample_fn(nw_state * s, tw_bf * bf, tw_lp * lp, struct ross_model_sample *sample) { + (void)bf; + (void)lp; memcpy(sample, &s->ross_sample, sizeof(s->ross_sample)); sample->nw_id = s->nw_id; sample->app_id = s->app_id; @@ -2900,6 +2901,8 @@ void ross_nw_lp_sample_fn(nw_state * s, tw_bf * bf, tw_lp * lp, struct ross_mode void ross_nw_lp_sample_rc_fn(nw_state * s, tw_bf * bf, tw_lp * lp, struct ross_model_sample *sample) { + (void)bf; + (void)lp; memcpy(&s->ross_sample, sample, sizeof(*sample)); } diff --git a/src/network-workloads/model-net-synthetic-dragonfly-all.c b/src/network-workloads/model-net-synthetic-dragonfly-all.c new file mode 100644 index 00000000..a0e584af --- /dev/null +++ b/src/network-workloads/model-net-synthetic-dragonfly-all.c @@ -0,0 +1,569 @@ +/* + * Copyright (C) 2019 Neil McGlohon + * See LICENSE notice in top-level directory + */ + +#include "codes/model-net.h" +#include "codes/lp-io.h" +#include "codes/codes.h" +#include "codes/codes_mapping.h" +#include "codes/configuration.h" +#include "codes/lp-type-lookup.h" + + +static int net_id = 0; +static int traffic = 1; +static double arrival_time = 1000.0; +static int PAYLOAD_SZ = 2048; + +static int num_servers_per_rep = 0; +static int num_routers_per_grp = 0; +static int num_nodes_per_grp = 0; +static int num_nodes_per_router = 0; +static int num_groups = 0; +static unsigned long long num_nodes = 0; + +//Dragonfly Custom Specific values +int num_router_rows, num_router_cols; + +//Dragonfly Plus Specific Values +int num_router_leaf, num_router_spine; + +//Dragonfly Dally Specific Values +int num_routers; //also used by original Dragonfly + +static char lp_io_dir[256] = {'\0'}; +static lp_io_handle io_handle; +static unsigned int lp_io_use_suffix = 0; +static int do_lp_io = 0; +static int num_msgs = 20; +static tw_stime sampling_interval = 800000; +static tw_stime sampling_end_time = 1600000; + +typedef struct svr_msg svr_msg; +typedef struct svr_state svr_state; + +/* global variables for codes mapping */ +static char group_name[MAX_NAME_LENGTH]; +static char lp_type_name[MAX_NAME_LENGTH]; +static int group_index, lp_type_index, rep_id, offset; + +/* type of events */ +enum svr_event +{ + KICKOFF, /* kickoff event */ + REMOTE, /* remote event */ + LOCAL /* local event */ +}; + +/* type of synthetic traffic */ +enum TRAFFIC +{ + UNIFORM = 1, /* sends message to a randomly selected node */ + RAND_PERM = 2, + NEAREST_GROUP = 3, /* sends message to the node connected to the neighboring router */ + NEAREST_NEIGHBOR = 4, /* sends message to the next node (potentially connected to the same router) */ + RANDOM_OTHER_GROUP = 5 + +}; + +struct svr_state +{ + int msg_sent_count; /* requests sent */ + int msg_recvd_count; /* requests recvd */ + int local_recvd_count; /* number of local messages received */ + tw_stime start_ts; /* time that we started sending requests */ + tw_stime end_ts; /* time that we ended sending requests */ + int svr_id; + int dest_id; +}; + +struct svr_msg +{ + enum svr_event svr_event_type; + tw_lpid src; /* source of this request or ack */ + int completed_sends; /* helper for reverse computation */ + model_net_event_return event_rc; +}; + +static void svr_init( + svr_state * ns, + tw_lp * lp); +static void svr_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp); +static void svr_rev_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp); +static void svr_finalize( + svr_state * ns, + tw_lp * lp); + +tw_lptype svr_lp = { + (init_f) svr_init, + (pre_run_f) NULL, + (event_f) svr_event, + (revent_f) svr_rev_event, + (commit_f) NULL, + (final_f) svr_finalize, + (map_f) codes_mapping, + sizeof(svr_state), +}; + +void dragonfly_svr_event_collect(svr_msg *m, tw_lp *lp, char *buffer, int *collect_flag) +{ + (void)lp; + (void)collect_flag; + int type = (int) m->svr_event_type; + memcpy(buffer, &type, sizeof(type)); +} + +/* can add in any model level data to be collected along with simulation engine data + * in the ROSS instrumentation. Will need to update the last field in + * svr_model_types[0] for the size of the data to save in each function call + */ +void dragonfly_svr_model_stat_collect(svr_state *s, tw_lp *lp, char *buffer) +{ + (void)s; + (void)lp; + (void)buffer; + return; +} + +st_model_types dragonfly_svr_model_types[] = { + {(ev_trace_f) dragonfly_svr_event_collect, + sizeof(int), + (model_stat_f) dragonfly_svr_model_stat_collect, + 0, + NULL, + NULL, + 0}, + {NULL, 0, NULL, 0, NULL, NULL, 0} +}; + +static const st_model_types *dragonfly_svr_get_model_stat_types(void) +{ + return(&dragonfly_svr_model_types[0]); +} + +void dragonfly_svr_register_model_types() +{ + st_model_type_register("nw-lp", dragonfly_svr_get_model_stat_types()); +} + +const tw_optdef app_opt [] = +{ + TWOPT_GROUP("Model net synthetic traffic " ), + TWOPT_UINT("traffic", traffic, "UNIFORM RANDOM=1, NEAREST NEIGHBOR=2 "), + TWOPT_UINT("num_messages", num_msgs, "Number of messages to be generated per terminal "), + TWOPT_UINT("payload_sz",PAYLOAD_SZ, "size of the message being sent "), + TWOPT_STIME("sampling-interval", sampling_interval, "the sampling interval "), + TWOPT_STIME("sampling-end-time", sampling_end_time, "sampling end time "), + TWOPT_STIME("arrival_time", arrival_time, "INTER-ARRIVAL TIME"), + TWOPT_CHAR("lp-io-dir", lp_io_dir, "Where to place io output (unspecified -> no output"), + TWOPT_UINT("lp-io-use-suffix", lp_io_use_suffix, "Whether to append uniq suffix to lp-io directory (default 0)"), + TWOPT_END() +}; + +const tw_lptype* svr_get_lp_type() +{ + return(&svr_lp); +} + +static void svr_add_lp_type() +{ + lp_type_register("nw-lp", svr_get_lp_type()); +} + +static void issue_event( + svr_state * ns, + tw_lp * lp) +{ + (void)ns; + tw_event *e; + svr_msg *m; + tw_stime kickoff_time; + + /* each server sends a dummy event to itself that will kick off the real + * simulation + */ + + /* skew each kickoff event slightly to help avoid event ties later on */ + kickoff_time = 1.1 * g_tw_lookahead + tw_rand_exponential(lp->rng, arrival_time); + + e = tw_event_new(lp->gid, kickoff_time, lp); + m = tw_event_data(e); + m->svr_event_type = KICKOFF; + tw_event_send(e); +} + +static void svr_init( + svr_state * ns, + tw_lp * lp) +{ + ns->start_ts = 0.0; + ns->dest_id = -1; + ns->svr_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + + issue_event(ns, lp); + return; +} + +static void handle_kickoff_rev_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + if(m->completed_sends) + return; + + if(b->c1) + tw_rand_reverse_unif(lp->rng); + + if(b->c8) + tw_rand_reverse_unif(lp->rng); + if(traffic == RANDOM_OTHER_GROUP) { + tw_rand_reverse_unif(lp->rng); + tw_rand_reverse_unif(lp->rng); + } + + model_net_event_rc2(lp, &m->event_rc); + ns->msg_sent_count--; + tw_rand_reverse_unif(lp->rng); +} +static void handle_kickoff_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + if(ns->msg_sent_count >= num_msgs) + { + m->completed_sends = 1; + return; + } + + m->completed_sends = 0; + + char anno[MAX_NAME_LENGTH]; + tw_lpid local_dest = -1, global_dest = -1; + + svr_msg * m_local = malloc(sizeof(svr_msg)); + svr_msg * m_remote = malloc(sizeof(svr_msg)); + + m_local->svr_event_type = LOCAL; + m_local->src = lp->gid; + + memcpy(m_remote, m_local, sizeof(svr_msg)); + m_remote->svr_event_type = REMOTE; + + ns->start_ts = tw_now(lp); + codes_mapping_get_lp_info(lp->gid, group_name, &group_index, lp_type_name, &lp_type_index, anno, &rep_id, &offset); + int local_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + + /* in case of uniform random traffic, send to a random destination. */ + if(traffic == UNIFORM) + { + b->c1 = 1; + local_dest = tw_rand_integer(lp->rng, 1, num_nodes - 2); + local_dest = (ns->svr_id + local_dest) % num_nodes; + } + else if(traffic == NEAREST_GROUP) + { + local_dest = (local_id + num_nodes_per_grp) % num_nodes; + //printf("\n LP %ld sending to %ld num nodes %d ", local_id, local_dest, num_nodes); + } + else if(traffic == NEAREST_NEIGHBOR) + { + local_dest = (local_id + 1) % num_nodes; +// printf("\n LP %ld sending to %ld num nodes %d ", rep_id * 2 + offset, local_dest, num_nodes); + } + else if(traffic == RAND_PERM) + { + if(ns->dest_id == -1) + { + b->c8 = 1; + ns->dest_id = tw_rand_integer(lp->rng, 0, num_nodes - 1); + local_dest = ns->dest_id; + } + else + { + local_dest = ns->dest_id; + } + } + else if(traffic == RANDOM_OTHER_GROUP) + { + int my_group_id = local_id / num_nodes_per_grp; + + int other_groups[num_groups-1]; + int added =0; + for(int i = 0; i < num_groups; i++) + { + if(i != my_group_id) { + other_groups[added] = i; + added++; + } + } + int rand_group = other_groups[tw_rand_integer(lp->rng,0,added -1)]; + int rand_node_intra_id = tw_rand_integer(lp->rng, 0, num_nodes_per_grp-1); + + local_dest = (rand_group * num_nodes_per_grp) + rand_node_intra_id; + printf("\n LP %d sending to %llu num nodes %llu ", local_id, LLU(local_dest), num_nodes); + + } + assert(local_dest < num_nodes); +// codes_mapping_get_lp_id(group_name, lp_type_name, anno, 1, local_dest / num_servers_per_rep, local_dest % num_servers_per_rep, &global_dest); + global_dest = codes_mapping_get_lpid_from_relative(local_dest, group_name, lp_type_name, NULL, 0); + ns->msg_sent_count++; + m->event_rc = model_net_event(net_id, "test", global_dest, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp); + + issue_event(ns, lp); + return; +} + +static void handle_remote_rev_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + (void)b; + (void)m; + (void)lp; + ns->msg_recvd_count--; +} + +static void handle_remote_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + (void)b; + (void)m; + (void)lp; + ns->msg_recvd_count++; +} + +static void handle_local_rev_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + (void)b; + (void)m; + (void)lp; + ns->local_recvd_count--; +} + +static void handle_local_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + (void)b; + (void)m; + (void)lp; + ns->local_recvd_count++; +} + +/* convert seconds to ns */ +static tw_stime s_to_ns(tw_stime ns) +{ + return(ns * (1000.0 * 1000.0 * 1000.0)); +} + +static void svr_finalize( + svr_state * ns, + tw_lp * lp) +{ + ns->end_ts = tw_now(lp); + + //printf("server %llu recvd %d bytes in %f seconds, %f MiB/s sent_count %d recvd_count %d local_count %d \n", (unsigned long long)lp->gid, PAYLOAD_SZ*ns->msg_recvd_count, ns_to_s(ns->end_ts-ns->start_ts), + // ((double)(PAYLOAD_SZ*ns->msg_sent_count)/(double)(1024*1024)/ns_to_s(ns->end_ts-ns->start_ts)), ns->msg_sent_count, ns->msg_recvd_count, ns->local_recvd_count); + return; +} + +static void svr_rev_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + switch (m->svr_event_type) + { + case REMOTE: + handle_remote_rev_event(ns, b, m, lp); + break; + case LOCAL: + handle_local_rev_event(ns, b, m, lp); + break; + case KICKOFF: + handle_kickoff_rev_event(ns, b, m, lp); + break; + default: + assert(0); + break; + } +} + +static void svr_event( + svr_state * ns, + tw_bf * b, + svr_msg * m, + tw_lp * lp) +{ + switch (m->svr_event_type) + { + case REMOTE: + handle_remote_event(ns, b, m, lp); + break; + case LOCAL: + handle_local_event(ns, b, m, lp); + break; + case KICKOFF: + handle_kickoff_event(ns, b, m, lp); + break; + default: + printf("\n Invalid message type %d ", m->svr_event_type); + assert(0); + break; + } +} + +int main( + int argc, + char **argv) +{ + int nprocs; + int rank; + int num_nets; + int *net_ids; + + tw_opt_add(app_opt); + tw_init(&argc, &argv); + +#ifdef USE_RDAMARIS + if(g_st_ross_rank) + { // keep damaris ranks from running code between here up until tw_end() +#endif + codes_comm_update(); + + + if(argc < 2) + { + printf("\n Usage: mpirun --sync=1/2/3 -- "); + MPI_Finalize(); + return 0; + } + + MPI_Comm_rank(MPI_COMM_CODES, &rank); + MPI_Comm_size(MPI_COMM_CODES, &nprocs); + + configuration_load(argv[2], MPI_COMM_CODES, &config); + + model_net_register(); + svr_add_lp_type(); + + if (g_st_ev_trace || g_st_model_stats || g_st_use_analysis_lps) + dragonfly_svr_register_model_types(); + + codes_mapping_setup(); + + net_ids = model_net_configure(&num_nets); + //assert(num_nets==1); + net_id = *net_ids; + free(net_ids); + + /* 5 days of simulation time */ + g_tw_ts_end = s_to_ns(5 * 24 * 60 * 60); + model_net_enable_sampling(sampling_interval, sampling_end_time); + + if(!(net_id == DRAGONFLY_DALLY || net_id == DRAGONFLY_PLUS || net_id == DRAGONFLY_CUSTOM || net_id == DRAGONFLY)) + { + printf("\n The workload generator is designed to only work with Dragonfly based model (Dally, Plus, Custom, Original) configuration only! %d %d ", DRAGONFLY_DALLY, net_id); + MPI_Finalize(); + return 0; + } + num_servers_per_rep = codes_mapping_get_lp_count("MODELNET_GRP", 1, "nw-lp", + NULL, 1); + + int num_routers_with_cns_per_group; + + if (net_id == DRAGONFLY_DALLY) { + if (!rank) + printf("Synthetic Generator: Detected Dragonfly Dally\n"); + configuration_get_value_int(&config, "PARAMS", "num_routers", NULL, &num_routers); + configuration_get_value_int(&config, "PARAMS", "num_groups", NULL, &num_groups); + configuration_get_value_int(&config, "PARAMS", "num_cns_per_router", NULL, &num_nodes_per_router); + num_routers_with_cns_per_group = num_routers; + } + else if (net_id == DRAGONFLY_PLUS) { + if (!rank) + printf("Synthetic Generator: Detected Dragonfly Plus\n"); + configuration_get_value_int(&config, "PARAMS", "num_router_leaf", NULL, &num_router_leaf); + configuration_get_value_int(&config, "PARAMS", "num_router_spine", NULL, &num_router_spine); + configuration_get_value_int(&config, "PARAMS", "num_routers", NULL, &num_routers); + configuration_get_value_int(&config, "PARAMS", "num_groups", NULL, &num_groups); + configuration_get_value_int(&config, "PARAMS", "num_cns_per_router", NULL, &num_nodes_per_router); + num_routers_with_cns_per_group = num_router_leaf; + + } + else if (net_id == DRAGONFLY_CUSTOM) { + if (!rank) + printf("Synthetic Generator: Detected Dragonfly Custom\n"); + configuration_get_value_int(&config, "PARAMS", "num_router_rows", NULL, &num_router_rows); + configuration_get_value_int(&config, "PARAMS", "num_router_cols", NULL, &num_router_cols); + configuration_get_value_int(&config, "PARAMS", "num_groups", NULL, &num_groups); + configuration_get_value_int(&config, "PARAMS", "num_cns_per_router", NULL, &num_nodes_per_router); + num_routers_with_cns_per_group = num_router_rows * num_router_cols; + } + else if (net_id == DRAGONFLY) { + if (!rank) + printf("Synthetic Generator: Detected Dragonfly Original 1D\n"); + configuration_get_value_int(&config, "PARAMS", "num_routers", NULL, &num_routers); + num_nodes_per_router = num_routers/2; + num_routers_with_cns_per_group = num_routers; + num_groups = num_routers * num_nodes_per_router + 1; + } + + num_nodes = num_groups * num_routers_with_cns_per_group * num_nodes_per_router; + num_nodes_per_grp = num_routers_with_cns_per_group * num_nodes_per_router; + + assert(num_nodes); + + if(lp_io_dir[0]) + { + do_lp_io = 1; + int flags = lp_io_use_suffix ? LP_IO_UNIQ_SUFFIX : 0; + int ret = lp_io_prepare(lp_io_dir, flags, &io_handle, MPI_COMM_CODES); + assert(ret == 0 || !"lp_io_prepare failure"); + } + tw_run(); + if (do_lp_io){ + int ret = lp_io_flush(io_handle, MPI_COMM_CODES); + assert(ret == 0 || !"lp_io_flush failure"); + } + model_net_report_stats(net_id); +#ifdef USE_RDAMARIS + } // end if(g_st_ross_rank) +#endif + tw_end(); + return 0; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/network-workloads/model-net-synthetic-slimfly.c b/src/network-workloads/model-net-synthetic-slimfly.c index 3c21cbc6..167b89c7 100644 --- a/src/network-workloads/model-net-synthetic-slimfly.c +++ b/src/network-workloads/model-net-synthetic-slimfly.c @@ -58,7 +58,7 @@ static char lp_type_name[MAX_NAME_LENGTH]; static int group_index, lp_type_index, rep_id, offset; /* 2D rank communication heat map */ -static int **comm_map; +// static int **comm_map; //NM: this was implemented in a way that wouldn't work in parallel execution /* Function for computing local and global connections for a given router id */ static void get_router_connections(int src_router_id, int* local_channels, int* global_channels); diff --git a/src/networks/model-net/model-net-lp.c b/src/networks/model-net/core/model-net-lp.c similarity index 99% rename from src/networks/model-net/model-net-lp.c rename to src/networks/model-net/core/model-net-lp.c index a02f404b..6b22bdd5 100644 --- a/src/networks/model-net/model-net-lp.c +++ b/src/networks/model-net/core/model-net-lp.c @@ -556,7 +556,7 @@ void model_net_base_event( tw_lp * lp){ if(m->h.magic != model_net_base_magic) - printf("\n LP ID mismatched %llu\n", lp->gid); + printf("\n LP ID mismatched %llu\n", LLU(lp->gid)); assert(m->h.magic == model_net_base_magic); diff --git a/src/networks/model-net/model-net-sched-impl.c b/src/networks/model-net/core/model-net-sched-impl.c similarity index 99% rename from src/networks/model-net/model-net-sched-impl.c rename to src/networks/model-net/core/model-net-sched-impl.c index 68b616ec..7bcf4d16 100644 --- a/src/networks/model-net/model-net-sched-impl.c +++ b/src/networks/model-net/core/model-net-sched-impl.c @@ -7,7 +7,7 @@ #include #include -#include "model-net-sched-impl.h" +#include #include #include #include diff --git a/src/networks/model-net/model-net-sched.c b/src/networks/model-net/core/model-net-sched.c similarity index 98% rename from src/networks/model-net/model-net-sched.c rename to src/networks/model-net/core/model-net-sched.c index 7c3413dc..ca31659a 100644 --- a/src/networks/model-net/model-net-sched.c +++ b/src/networks/model-net/core/model-net-sched.c @@ -11,7 +11,7 @@ #include "codes/model-net-sched.h" #include "codes/model-net-lp.h" -#include "model-net-sched-impl.h" +#include "codes/model-net-sched-impl.h" #include "codes/quicklist.h" #define X(a,b,c) b, diff --git a/src/networks/model-net/model-net.c b/src/networks/model-net/core/model-net.c similarity index 100% rename from src/networks/model-net/model-net.c rename to src/networks/model-net/core/model-net.c diff --git a/src/networks/model-net/dragonfly-custom.C b/src/networks/model-net/dragonfly-custom.C index 49c2c513..a2deb05b 100644 --- a/src/networks/model-net/dragonfly-custom.C +++ b/src/networks/model-net/dragonfly-custom.C @@ -26,7 +26,6 @@ #endif #define DUMP_CONNECTIONS 0 -#define CREDIT_SIZE 8 #define DFLY_HASH_TABLE_SIZE 4999 // debugging parameters #define DEBUG_LP 892 @@ -175,7 +174,10 @@ struct dragonfly_param double cn_delay; double local_delay; double global_delay; - double credit_delay; + int credit_size; + double local_credit_delay; + double global_credit_delay; + double cn_credit_delay; double router_delay; }; @@ -312,7 +314,6 @@ enum last_hop GLOBAL=1, LOCAL, TERMINAL, - ROOT }; /* three forms of routing algorithms available, adaptive routing is not @@ -852,10 +853,98 @@ else p->num_routers, p->radix); } - p->cn_delay = bytes_to_ns(p->chunk_size, p->cn_bandwidth); - p->local_delay = bytes_to_ns(p->chunk_size, p->local_bandwidth); - p->global_delay = bytes_to_ns(p->chunk_size, p->global_bandwidth); - p->credit_delay = bytes_to_ns(CREDIT_SIZE, p->local_bandwidth); //assume 8 bytes packet + rc = configuration_get_value_double(&config, "PARAMS", "cn_delay", anno, &p->cn_delay); + if (rc) { + p->cn_delay = bytes_to_ns(p->chunk_size, p->cn_bandwidth); + if(!myRank) + fprintf(stderr, "cn_delay not specified, using default calculation: %.2f\n", p->cn_delay); + } + + rc = configuration_get_value_double(&config, "PARAMS", "local_delay", anno, &p->local_delay); + if (rc) { + p->local_delay = bytes_to_ns(p->chunk_size, p->local_bandwidth); + if(!myRank) + fprintf(stderr, "local_delay not specified, using default calculation: %.2f\n", p->local_delay); + } + rc = configuration_get_value_double(&config, "PARAMS", "global_delay", anno, &p->global_delay); + if (rc) { + p->global_delay = bytes_to_ns(p->chunk_size, p->global_bandwidth); + if(!myRank) + fprintf(stderr, "global_delay not specified, using default calculation: %.2f\n", p->global_delay); + } + + //CREDIT DELAY CONFIGURATION LOGIC ------------ + rc = configuration_get_value_int(&config, "PARAMS", "credit_size", anno, &p->credit_size); + if (rc) { + p->credit_size = 8; + if(!myRank) + fprintf(stderr, "credit_size not specified, using default: %d\n", p->credit_size); + } + + double general_credit_delay; + int credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "credit_delay", anno, &general_credit_delay); + int local_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "local_credit_delay", anno, &p->local_credit_delay); + int global_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "global_credit_delay", anno, &p->global_credit_delay); + int cn_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "cn_credit_delay", anno, &p->cn_credit_delay); + + int auto_credit_delay_flag; + rc = configuration_get_value_int(&config, "PARAMS", "auto_credit_delay", anno, &auto_credit_delay_flag); + if (rc) { + auto_credit_delay_flag = 0; + } + else { + if(!myRank && auto_credit_delay_flag) + fprintf(stderr, "auto_credit_delay flag enabled. All credit delays will be calculated based on their respective bandwidths\n"); + } + + //If the user specifies a general "credit_delay" AND any of the more specific credit delays, throw an error to make sure they correct their configuration + if (!credit_delay_unset && !(local_credit_delay_unset || global_credit_delay_unset || cn_credit_delay_unset)) + tw_error(TW_LOC, "\nCannot set both a general credit delay and specific (local/global/cn) credit delays. Check configuration file."); + + //If the user specifies ANY credit delays general or otherwise AND has the auto credit delay flag enabled, throw an error to make sure they correct the conflicting configuration + if ((!credit_delay_unset || !local_credit_delay_unset || !global_credit_delay_unset || !cn_credit_delay_unset) && auto_credit_delay_flag) + tw_error(TW_LOC, "\nCannot set both a credit delay (general or specific) and also enable auto credit delay calculation. Check Configuration file."); + + //If the user doesn't specify either general or specific credit delays - calculate credit delay based on local bandwidth. + //This is old legacy behavior that is left in to make sure that the credit delay configurations of old aren't semantically different + //Other possible way to program this would be to make each credit delay be set based on their respective bandwidths but this semantically + //changes the behavior of old configuration files. (although it would be more accurate) + if (credit_delay_unset && local_credit_delay_unset && global_credit_delay_unset && cn_credit_delay_unset && !auto_credit_delay_flag) { + p->local_credit_delay = bytes_to_ns(p->credit_size, p->local_bandwidth); + p->global_credit_delay = p->local_credit_delay; + p->cn_credit_delay = p->local_credit_delay; + if(!myRank) + fprintf(stderr, "no credit_delay specified - all credit delays set to %.2f\n",p->local_credit_delay); + } + //If the user doesn't specify a general credit delay but leaves any of the specific credit delay values unset, then we need to set those (the above conditional handles if none of them had been set) + else if (credit_delay_unset) { + if (local_credit_delay_unset) { + p->local_credit_delay = bytes_to_ns(p->credit_size, p->local_bandwidth); + if(!myRank && !auto_credit_delay_flag) //if the auto credit delay flag is true then we've already printed what we're going to do + fprintf(stderr, "local_credit_delay not specified, using calculation based on local bandwidth: %.2f\n", p->local_credit_delay); + } + if (global_credit_delay_unset) { + p->global_credit_delay = bytes_to_ns(p->credit_size, p->global_bandwidth); + if(!myRank && !auto_credit_delay_flag) //if the auto credit delay flag is true then we've already printed what we're going to do + fprintf(stderr, "global_credit_delay not specified, using calculation based on global bandwidth: %.2f\n", p->global_credit_delay); + } + if (cn_credit_delay_unset) { + p->cn_credit_delay = bytes_to_ns(p->credit_size, p->cn_bandwidth); + if(!myRank && !auto_credit_delay_flag) //if the auto credit delay flag is true then we've already printed what we're going to do + fprintf(stderr, "cn_credit_delay not specified, using calculation based on cn bandwidth: %.2f\n", p->cn_credit_delay); + } + } + //If the user specifies a general credit delay (but didn't specify any specific credit delays) then we set all specific credit delays to the general + else if (!credit_delay_unset) { + p->local_credit_delay = general_credit_delay; + p->global_credit_delay = general_credit_delay; + p->cn_credit_delay = general_credit_delay; + + if(!myRank) + fprintf(stderr, "general credit_delay specified - all credit delays set to %.2f\n",general_credit_delay); + } + //END CREDIT DELAY CONFIGURATION LOGIC ---------------- + } void dragonfly_custom_configure(){ @@ -1184,6 +1273,7 @@ static void router_credit_send(router_state * s, terminal_custom_message * msg, int dest = 0, type = R_BUFFER; int is_terminal = 0; + double credit_delay; const dragonfly_param *p = s->params; @@ -1192,15 +1282,20 @@ static void router_credit_send(router_state * s, terminal_custom_message * msg, dest = msg->src_terminal_id; type = T_BUFFER; is_terminal = 1; - } else if(msg->last_hop == GLOBAL - || msg->last_hop == LOCAL - || msg->last_hop == ROOT) - { + credit_delay = p->cn_credit_delay; + } + else if(msg->last_hop == GLOBAL) { dest = msg->intm_lp_id; - } else + credit_delay = p->global_credit_delay; + } + else if(msg->last_hop == LOCAL) { + dest = msg->intm_lp_id; + credit_delay = p->local_credit_delay; + } + else printf("\n Invalid message type"); - ts = g_tw_lookahead + p->credit_delay + tw_rand_unif(lp->rng); + ts = g_tw_lookahead + credit_delay + tw_rand_unif(lp->rng); if (is_terminal) { buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_CUSTOM, @@ -1336,7 +1431,7 @@ static void packet_generate(terminal_state * s, tw_bf * bf, terminal_custom_mess s->terminal_length += s->params->chunk_size; } - if(s->terminal_length < 2 * s->params->cn_vc_size) { + if(s->terminal_length < 2 * s->params->cn_vc_size) { //TODO This hardcoded 2 * s->params->cn_vc_size seems dubious model_net_method_idle_event(nic_ts, 0, lp); } else { bf->c11 = 1; @@ -1491,9 +1586,9 @@ static void packet_send(terminal_state * s, tw_bf * bf, terminal_custom_message if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && lp->gid == T_ID) - printf("\n Packet %llu generated at terminal %d dest %llu size %llu num chunks %llu router-id %d %d", + printf("\n Packet %llu generated at terminal %d dest %llu size %llu num chunks %llu router-id %d %llu", cur_entry->msg.packet_ID, s->terminal_id, LLU(cur_entry->msg.dest_terminal_id), - LLU(cur_entry->msg.packet_size), LLU(num_chunks), s->router_id, router_id); + LLU(cur_entry->msg.packet_size), LLU(num_chunks), s->router_id, LLU(router_id)); if(cur_entry->msg.chunk_id == num_chunks - 1 && (cur_entry->msg.local_event_size_bytes > 0)) { @@ -1720,9 +1815,9 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_custom_messag assert(lp->gid == msg->dest_terminal_id); if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) - printf("\n Packet %d arrived at lp %llu hops %d ", msg->sender_lp, LLU(lp->gid), msg->my_N_hop); + printf("\n Packet %llu arrived at lp %llu hops %d ", LLU(msg->sender_lp), LLU(lp->gid), msg->my_N_hop); - tw_stime ts = g_tw_lookahead + s->params->credit_delay + tw_rand_unif(lp->rng); + tw_stime ts = g_tw_lookahead + s->params->cn_credit_delay + tw_rand_unif(lp->rng); // no method_event here - message going to router tw_event * buf_e; @@ -2509,7 +2604,7 @@ get_next_stop(router_state * s, next_stop[select_chan] % num_routers_per_mgrp, &router_dest_id); if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) - printf("\n Next stop is %ld ", next_stop[select_chan]); + printf("\n Next stop is %d ", next_stop[select_chan]); return router_dest_id; } @@ -2556,7 +2651,7 @@ get_next_stop(router_state * s, dest_lp = dests[select_chan]; } if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) - printf("\n Next stop is %ld ", dest_lp); + printf("\n Next stop is %d ", dest_lp); codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, dest_lp / num_routers_per_mgrp, dest_lp % num_routers_per_mgrp, &router_dest_id); @@ -3347,7 +3442,7 @@ router_packet_send( router_state * s, double bytetime = delay; if(cur_entry->msg.packet_size == 0) - bytetime = bytes_to_ns(CREDIT_SIZE, bandwidth); + bytetime = bytes_to_ns(s->params->credit_size, bandwidth); if((cur_entry->msg.packet_size < s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) bytetime = bytes_to_ns(cur_entry->msg.packet_size % s->params->chunk_size, bandwidth); diff --git a/src/networks/model-net/dragonfly-dally.C b/src/networks/model-net/dragonfly-dally.C index 43035254..0154b2e8 100644 --- a/src/networks/model-net/dragonfly-dally.C +++ b/src/networks/model-net/dragonfly-dally.C @@ -1,19 +1,17 @@ /* - * Copyright (C) 2013 University of Chicago. - * See COPYRIGHT notice in top-level directory. + * Copyright (C) 2019, UChicago Argonne and co. + * See LICENSE in top-level directory + * + * Originally written by Misbah Mubarak + * Updated by Neil McGlohon * * A 1D specific dragonfly custom model - diverged from dragonfly-custom.C * Differs from dragonfly.C in that it allows for the custom features typically found in * dragonfly-custom.C. * - * This is not intended to be a long term solution, but enough changes had been made that merging - * into dragonfly-custom.C wasn't feasible at the time. And we needed to have this work introduced - * to the repo. - * - * DO NOT USE THIS MODEL AS A STARTING POINT FOR NEW MODELS - * It has lots of "fossils" leftover from dragonfly custom that depending on how its used, are bad. - * The orthogonally new changes will hopefully be merged into dragonfly custom. But I'm not familiar - * enough with either dragonfly custom or this model in order to confidently make the merge safe for both. + * This was not intended to be a long term solution, but enough changes had been made that merging + * into dragonfly-custom.C wasn't feasible at the time of creation. Today, there is enough differences + * in the two models that there is currently no plan to re-merge the two. */ #include @@ -41,7 +39,6 @@ #define DUMP_CONNECTIONS 0 #define PRINT_CONFIG 1 -#define CREDIT_SIZE 8 #define DFLY_HASH_TABLE_SIZE 4999 // debugging parameters #define BW_MONITOR 1 @@ -52,7 +49,7 @@ #define TRACK_MSG -1 #define DEBUG 0 #define MAX_STATS 65536 -#define SHOW_ADAP_STATS 1\ +#define SHOW_ADAP_STATS 1 // maximum number of characters allowed to represent the routing algorithm as a string #define MAX_ROUTING_CHARS 32 @@ -60,8 +57,6 @@ //Routing Defines //NONMIN_INCLUDE_SOURCE_DEST: Do we allow source and destination groups to be viable choces for indirect group (i.e. do we allow nonminimal routing to sometimes be minimal?) #define NONMIN_INCLUDE_SOURCE_DEST 0 - - //End routing defines #define LP_CONFIG_NM_TERM (model_net_lp_config_names[DRAGONFLY_DALLY]) @@ -69,7 +64,6 @@ #define LP_CONFIG_NM_ROUT (model_net_lp_config_names[DRAGONFLY_DALLY_ROUTER]) #define LP_METHOD_NM_ROUT (model_net_method_names[DRAGONFLY_DALLY_ROUTER]) -static int debug_cnt = 0; static int max_lvc_src_g = 1; static int max_lvc_intm_g = 3; static int min_gvc_src_g = 0; @@ -96,10 +90,25 @@ static vector< vector< vector > > connectionList; static vector< ConnectionManager > connManagerList; +/* Note: Dragonfly Dally doesn't distinguish intra links into colored "types". + So the type field here is ignored. This will be changed at some point in the + future but want to provide a script that will allow for easy converting of + intra-group files to the new format of (src, dest) instead of the current + (src, dest, type). If we changed this here now, then all pre-existing + intra-group files will break. + + IntraGroupLink is a struct used to unpack binary data regarding intra group + connections from the supplied intra-group file. This struct should not be + utilized anywhere else in the model. +*/ struct IntraGroupLink { int src, dest, type; }; +/* InterGroupLink is a struct used to unpack binary data regarding inter group + connections from the supplied inter-group file. This struct should not be + utilized anywhere else in the model. +*/ struct InterGroupLink { int src, dest; }; @@ -111,8 +120,6 @@ extern cortex_topology dragonfly_dally_cortex_topology; } #endif -static int debug_slot_count = 0; -static long term_ecount, router_ecount, term_rev_ecount, router_rev_ecount; static long packet_gen = 0, packet_fin = 0; static double maxd(double a, double b) { return a < b ? b : a; } @@ -138,10 +145,6 @@ static int router_magic_num = 0; /* terminal magic number */ static int terminal_magic_num = 0; -/* Hops within a group */ -static int num_intra_nonmin_hops = 4; -static int num_intra_min_hops = 2; - static FILE * dragonfly_rtr_bw_log = NULL; //static FILE * dragonfly_term_bw_log = NULL; @@ -203,7 +206,10 @@ struct dragonfly_param double cn_delay; double local_delay; double global_delay; - double credit_delay; + int credit_size; + double local_credit_delay; + double global_credit_delay; + double cn_credit_delay; double router_delay; int max_hops_notify; //maximum number of hops allowed before notifying via printout @@ -250,92 +256,6 @@ struct dfly_qhash_entry struct qhash_head hash_link; }; -/* handles terminal and router events like packet generate/send/receive/buffer */ -typedef struct terminal_state terminal_state; -typedef struct router_state router_state; - -/* dragonfly compute node data structure */ -struct terminal_state -{ - uint64_t packet_counter; - - int packet_gen; - int packet_fin; - - int total_gen_size; - - // Dragonfly specific parameters - unsigned int router_id; - unsigned int terminal_id; - - int* vc_occupancy; // NUM_VC - tw_stime terminal_available_time; - terminal_dally_message_list **terminal_msgs; - terminal_dally_message_list **terminal_msgs_tail; - int in_send_loop; - struct mn_stats dragonfly_stats_array[CATEGORY_MAX]; - - int * qos_status; - int * qos_data; - - int last_qos_lvl; - int is_monitoring_bw; - - struct rc_stack * st; - int issueIdle; - int* terminal_length; - - const char * anno; - const dragonfly_param *params; - - struct qhash_table *rank_tbl; - uint64_t rank_tbl_pop; - - tw_stime total_time; - uint64_t total_msg_size; - double total_hops; - long finished_msgs; - long finished_chunks; - long finished_packets; - - tw_stime last_buf_full; - tw_stime busy_time; - - unsigned long* stalled_chunks; //Counter for when a packet cannot be immediately routed - - tw_stime max_latency; - tw_stime min_latency; - - char output_buf[4096]; - char output_buf2[4096]; - /* For LP suspend functionality */ - int error_ct; - - /* For sampling */ - long fin_chunks_sample; - long data_size_sample; - double fin_hops_sample; - tw_stime fin_chunks_time; - tw_stime busy_time_sample; - - char sample_buf[4096]; - struct dfly_cn_sample * sample_stat; - int op_arr_size; - int max_arr_size; - - /* for logging forward and reverse events */ - long fwd_events; - long rev_events; - - /* following used for ROSS model-level stats collection */ - long fin_chunks_ross_sample; - long data_size_ross_sample; - long fin_hops_ross_sample; - tw_stime fin_chunks_time_ross_sample; - tw_stime busy_time_ross_sample; - struct dfly_cn_sample ross_sample; -}; - typedef enum qos_priority { Q_HIGH =0, @@ -358,6 +278,7 @@ typedef enum conn_minimality_t C_NONMIN } conn_minimality_t; +// See implementations in dfdally_score_connection() typedef enum route_scoring_metric_t { ALPHA = 1, //Count queue lengths and pending messages for a port @@ -366,7 +287,7 @@ typedef enum route_scoring_metric_t DELTA //count queue lengths and pending messages for a port, biased 2x against nonminimal conns } route_scoring_metric_t; -/* terminal event type (1-4) */ +/* Enumeration of types of events sent between model LPs */ typedef enum event_t { T_GENERATE=1, @@ -387,12 +308,15 @@ enum last_hop GLOBAL=1, LOCAL, TERMINAL, - ROOT }; -/* three forms of routing algorithms available, adaptive routing is not - * accurate and fully functional in the current version as the formulas - * for detecting load on global channels are not very accurate */ +/* Routing Algorithms Implemented: + Minimal - Guarantees shortest path between terminals, next stop is polled randomly from set of legal next stops + Non-Minimal - Valiant routing, picks random group, routes there first, then routes to destination. Next stop is poleld randomly from set of legal next stops + Adaptive - UGAL, not yet implmented TODO: Implement + Prog-adaptive - PAR + Prog-adaptive Legacy - Old implementation of PAR (use at own risk) +*/ enum ROUTING_ALGO { MINIMAL = 1, @@ -454,6 +378,90 @@ static bool isRoutingNonminimalExplicit(int alg) return false; } +/* handles terminal and router events like packet generate/send/receive/buffer */ +typedef struct terminal_state terminal_state; +typedef struct router_state router_state; + +/* dragonfly compute node data structure */ +struct terminal_state +{ + uint64_t packet_counter; + + int packet_gen; + int packet_fin; + + int total_gen_size; + + // Dragonfly specific parameters + unsigned int router_id; + unsigned int terminal_id; + + int* vc_occupancy; // NUM_VC + tw_stime terminal_available_time; + terminal_dally_message_list **terminal_msgs; + terminal_dally_message_list **terminal_msgs_tail; + int in_send_loop; + struct mn_stats dragonfly_stats_array[CATEGORY_MAX]; + + int * qos_status; + int * qos_data; + + int last_qos_lvl; + int is_monitoring_bw; + + struct rc_stack * st; + int issueIdle; + int* terminal_length; + + const char * anno; + const dragonfly_param *params; + + struct qhash_table *rank_tbl; + uint64_t rank_tbl_pop; + + tw_stime total_time; + uint64_t total_msg_size; + double total_hops; + long finished_msgs; + long finished_chunks; + long finished_packets; + + tw_stime last_buf_full; + tw_stime busy_time; + + unsigned long stalled_chunks; //Counter for when a packet cannot be immediately routed due to full VC + + tw_stime max_latency; + tw_stime min_latency; + + char output_buf[4096]; + char output_buf2[4096]; + + /* For sampling */ + long fin_chunks_sample; + long data_size_sample; + double fin_hops_sample; + tw_stime fin_chunks_time; + tw_stime busy_time_sample; + + char sample_buf[4096]; + struct dfly_cn_sample * sample_stat; + int op_arr_size; + int max_arr_size; + + /* for logging forward and reverse events */ + long fwd_events; + long rev_events; + + /* following used for ROSS model-level stats collection */ + long fin_chunks_ross_sample; + long data_size_ross_sample; + long fin_hops_ross_sample; + tw_stime fin_chunks_time_ross_sample; + tw_stime busy_time_ross_sample; + struct dfly_cn_sample ross_sample; +}; + struct router_state { unsigned int router_id; @@ -466,13 +474,12 @@ struct router_state ConnectionManager *connMan; //manages and organizes connections from this router tw_stime* next_output_available_time; - tw_stime* cur_hist_start_time; tw_stime* last_buf_full; tw_stime* busy_time; tw_stime* busy_time_sample; - unsigned long* stalled_chunks; //Counter for when a packet is put into queued messages instead of routing + unsigned long* stalled_chunks; //Counter for when a packet is put into queued messages instead of routing due to full VC terminal_dally_message_list ***pending_msgs; terminal_dally_message_list ***pending_msgs_tail; @@ -493,9 +500,6 @@ struct router_state const char * anno; const dragonfly_param *params; - - int* prev_hist_num; - int* cur_hist_num; char output_buf[4096]; @@ -510,6 +514,8 @@ struct router_state struct dfly_router_sample ross_rsample; }; + + /* had to pull some of the ROSS model stats collection stuff up here */ void custom_dally_dragonfly_event_collect(terminal_dally_message *m, tw_lp *lp, char *buffer, int *collect_flag); void custom_dally_dragonfly_model_stat_collect(terminal_state *s, tw_lp *lp, char *buffer); @@ -530,1400 +536,1299 @@ st_model_types custom_dally_dragonfly_model_types[] = { {(ev_trace_f) custom_dally_dragonfly_event_collect, sizeof(int), (model_stat_f) custom_dally_dfly_router_model_stat_collect, - 0, //updated in router_dally_setup() since it's based on the radix + 0, //updated in router_dally_init() since it's based on the radix (sample_event_f) ross_dally_dragonfly_rsample_fn, (sample_revent_f) ross_dally_dragonfly_rsample_rc_fn, - 0 } , //updated in router_dally_setup() since it's based on the radix + 0 } , //updated in router_dally_init() since it's based on the radix {NULL, 0, NULL, 0, NULL, NULL, 0} }; /* End of ROSS model stats collection */ -static short routing = MINIMAL; -static short scoring = ALPHA; +/* For ROSS event tracing */ +void custom_dally_dragonfly_event_collect(terminal_dally_message *m, tw_lp *lp, char *buffer, int *collect_flag) +{ + (void)lp; + (void)collect_flag; -static Connection dfdally_prog_adaptive_legacy_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id); + int type = (int) m->type; + memcpy(buffer, &type, sizeof(type)); +} -static tw_stime dragonfly_total_time = 0; -static tw_stime dragonfly_max_latency = 0; +void custom_dally_dragonfly_model_stat_collect(terminal_state *s, tw_lp *lp, char *buffer) +{ + (void)lp; + int index = 0; + tw_lpid id = 0; + long tmp = 0; + tw_stime tmp2 = 0; + + id = s->terminal_id; + memcpy(&buffer[index], &id, sizeof(id)); + index += sizeof(id); -static long long total_hops = 0; -static long long N_finished_packets = 0; -static long long total_msg_sz = 0; -static long long N_finished_msgs = 0; -static long long N_finished_chunks = 0; + tmp = s->fin_chunks_ross_sample; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->fin_chunks_ross_sample = 0; -/* convert ns to seconds */ -static tw_stime ns_to_s(tw_stime ns) -{ - return(ns / (1000.0 * 1000.0 * 1000.0)); -} + tmp = s->data_size_ross_sample; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->data_size_ross_sample = 0; -static double bytes_to_gigabytes(double bytes) -{ - return bytes / (double) (1024 * 1024 * 1024); -} -static int dragonfly_rank_hash_compare( - void *key, struct qhash_head *link) -{ - struct dfly_hash_key *message_key = (struct dfly_hash_key *)key; - struct dfly_qhash_entry *tmp = NULL; + tmp = s->fin_hops_ross_sample; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->fin_hops_ross_sample = 0; - tmp = qhash_entry(link, struct dfly_qhash_entry, hash_link); - - if (tmp->key.message_id == message_key->message_id && tmp->key.sender_id == message_key->sender_id) - return 1; + tmp2 = s->fin_chunks_time_ross_sample; + memcpy(&buffer[index], &tmp2, sizeof(tmp2)); + index += sizeof(tmp2); + s->fin_chunks_time_ross_sample = 0; - return 0; + tmp2 = s->busy_time_ross_sample; + memcpy(&buffer[index], &tmp2, sizeof(tmp2)); + index += sizeof(tmp2); + s->busy_time_ross_sample = 0; + + return; } -static int dragonfly_hash_func(void *k, int table_size) + +void custom_dally_dfly_router_model_stat_collect(router_state *s, tw_lp *lp, char *buffer) { - struct dfly_hash_key *tmp = (struct dfly_hash_key *)k; - uint32_t pc = 0, pb = 0; - bj_hashlittle2(tmp, sizeof(*tmp), &pc, &pb); - return (int)(pc % (table_size - 1)); - /*uint64_t key = (~tmp->message_id) + (tmp->message_id << 18); - key = key * 21; - key = ~key ^ (tmp->sender_id >> 4); - key = key * tmp->sender_id; - return (int)(key & (table_size - 1));*/ -} + (void)lp; -/* convert GiB/s and bytes to ns */ -static tw_stime bytes_to_ns(uint64_t bytes, double GB_p_s) -{ - tw_stime time; + const dragonfly_param * p = s->params; + int i, index = 0; - /* bytes to GB */ - time = ((double)bytes)/(1024.0*1024.0*1024.0); - /* GiB to s */ - time = time / GB_p_s; - /* s to ns */ - time = time * 1000.0 * 1000.0 * 1000.0; + tw_lpid id = 0; + tw_stime tmp = 0; + int64_t tmp2 = 0; - return(time); -} + id = s->router_id; + memcpy(&buffer[index], &id, sizeof(id)); + index += sizeof(id); -/* returns the dragonfly message size */ -int dragonfly_dally_get_msg_sz(void) -{ - return sizeof(terminal_dally_message); -} + for(i = 0; i < p->radix; i++) + { + tmp = s->busy_time_ross_sample[i]; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->busy_time_ross_sample[i] = 0; -static void free_tmp(void * ptr) -{ - struct dfly_qhash_entry * dfly = (dfly_qhash_entry *)ptr; - if(dfly->remote_event_data) - free(dfly->remote_event_data); - - if(dfly) - free(dfly); + tmp2 = s->link_traffic_ross_sample[i]; + memcpy(&buffer[index], &tmp2, sizeof(tmp2)); + index += sizeof(tmp2); + s->link_traffic_ross_sample[i] = 0; + } + return; } -int get_vcg_from_category(terminal_dally_message * msg) -{ - if(strcmp(msg->category, "high") == 0) - return Q_HIGH; - else if(strcmp(msg->category, "medium") == 0) - return Q_MEDIUM; - else - tw_error(TW_LOC, "\n priority needs to be specified with qos_levels>1 %d", msg->category); -} -static void append_to_terminal_dally_message_list( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index, - terminal_dally_message_list *msg) +static const st_model_types *custom_dally_dragonfly_get_model_types(void) { -// printf("\n msg id %d ", msg->msg.packet_ID); - if (thisq[index] == NULL) { - thisq[index] = msg; - } - else { - assert(thistail[index] != NULL); - thistail[index]->next = msg; - msg->prev = thistail[index]; - } - thistail[index] = msg; -// printf("\n done adding %d ", msg->msg.packet_ID); + return(&custom_dally_dragonfly_model_types[0]); } -static void prepend_to_terminal_dally_message_list( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index, - terminal_dally_message_list *msg) +static const st_model_types *custom_dally_dfly_router_get_model_types(void) { - if (thisq[index] == NULL) { - thistail[index] = msg; - } - else { - thisq[index]->prev = msg; - msg->next = thisq[index]; - } - thisq[index] = msg; + return(&custom_dally_dragonfly_model_types[1]); } -static terminal_dally_message_list* return_head( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index) +static void custom_dally_dragonfly_register_model_types(st_model_types *base_type) { - terminal_dally_message_list *head = thisq[index]; - if (head != NULL) { - thisq[index] = head->next; - if(head->next != NULL) { - head->next->prev = NULL; - head->next = NULL; - } - else { - thistail[index] = NULL; - } - } - return head; + st_model_type_register(LP_CONFIG_NM_TERM, base_type); } -static terminal_dally_message_list* return_tail( - terminal_dally_message_list ** thisq, - terminal_dally_message_list ** thistail, - int index) +static void custom_dally_router_register_model_types(st_model_types *base_type) { - terminal_dally_message_list *tail = thistail[index]; - assert(tail); - if (tail->prev != NULL) { - tail->prev->next = NULL; - thistail[index] = tail->prev; - tail->prev = NULL; - } - else { - thistail[index] = NULL; - thisq[index] = NULL; - } - return tail; + st_model_type_register(LP_CONFIG_NM_ROUT, base_type); } -/* TODO: Differentiate between local and global bandwidths. */ -static int get_rtr_bandwidth_consumption(router_state * s, int qos_lvl, int output_port) +/*** END of ROSS event tracing additions */ + +static void ross_dally_dragonfly_rsample_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample) { - assert(qos_lvl >= Q_HIGH && qos_lvl <= Q_LOW); - assert(output_port < s->params->intra_grp_radix + s->params->num_global_channels + s->params->num_cn); + (void)lp; + (void)bf; - int bandwidth = s->params->cn_bandwidth; - if (output_port < s->params->intra_grp_radix) - bandwidth = s->params->local_bandwidth; - else if (output_port < s->params->intra_grp_radix + s->params->num_global_channels) - bandwidth = s->params->global_bandwidth; + const dragonfly_param * p = s->params; + int i = 0; - /* conversion into bytes from GiB */ - double max_bw = bandwidth * 1024.0 * 1024.0 * 1024.0; - double max_bw_per_ns = max_bw / (1000.0 * 1000.0 * 1000.0); - double max_bytes_per_win = max_bw_per_ns * bw_reset_window; + sample->router_id = s->router_id; + sample->end_time = tw_now(lp); + sample->fwd_events = s->ross_rsample.fwd_events; + sample->rev_events = s->ross_rsample.rev_events; + sample->busy_time = (tw_stime*)((&sample->rev_events) + 1); + sample->link_traffic_sample = (int64_t*)((&sample->busy_time[0]) + p->radix); - /* bw_consumed would be in Gigabytes per second. */ -// tw_stime reset_window_s = ns_to_s(bw_reset_window); -// double bw_gib = bytes_to_gigabytes(s->qos_data[output_port][qos_lvl]); -// double bw_consumed = ((double)bw_gib / (double)reset_window_s); - int percent_bw = (((double)s->qos_data[output_port][qos_lvl]) / max_bytes_per_win) * 100; -// printf("\n percent bw consumed by qos_lvl %d is %d bytes transferred %d max_bw %lf ", qos_lvl, percent_bw, s->qos_data[output_port][qos_lvl], max_bw_per_ns); - return percent_bw; + for(; i < p->radix; i++) + { + sample->busy_time[i] = s->ross_rsample.busy_time[i]; + sample->link_traffic_sample[i] = s->ross_rsample.link_traffic_sample[i]; + } + /* clear up the current router stats */ + s->ross_rsample.fwd_events = 0; + s->ross_rsample.rev_events = 0; + + for( i = 0; i < p->radix; i++) + { + s->ross_rsample.busy_time[i] = 0; + s->ross_rsample.link_traffic_sample[i] = 0; + } } -void dragonfly_print_params(const dragonfly_param *p) +static void ross_dally_dragonfly_rsample_rc_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample) { - int myRank; - MPI_Comm_rank(MPI_COMM_CODES, &myRank); - if (!myRank) { - printf("\n------------------ Dragonfly Dally Parameters ---------\n"); - printf("\tnum_routers = %d\n",p->num_routers); - printf("\tlocal_bandwidth = %.2f\n",p->local_bandwidth); - printf("\tglobal_bandwidth = %.2f\n",p->global_bandwidth); - printf("\tcn_bandwidth = %.2f\n",p->cn_bandwidth); - printf("\tnum_vcs = %d\n",p->num_vcs); - printf("\tnum_qos_levels = %d\n",p->num_qos_levels); - printf("\tlocal_vc_size = %d\n",p->local_vc_size); - printf("\tglobal_vc_size = %d\n",p->global_vc_size); - printf("\tcn_vc_size = %d\n",p->cn_vc_size); - printf("\tchunk_size = %d\n",p->chunk_size); - printf("\tnum_cn = %d\n",p->num_cn); - printf("\tintra_grp_radix = %d\n",p->intra_grp_radix); - printf("\tnum_groups = %d\n",p->num_groups); - printf("\tvirtual radix = %d\n",p->radix); - printf("\ttotal_routers = %d\n",p->total_routers); - printf("\ttotal_terminals = %d\n",p->total_terminals); - printf("\tnum_global_channels = %d\n",p->num_global_channels); - printf("\tcn_delay = %.2f\n",p->cn_delay); - printf("\tlocal_delay = %.2f\n",p->local_delay); - printf("\tglobal_delay = %.2f\n",p->global_delay); - printf("\tcredit_delay = %.2f\n",p->credit_delay); - printf("\trouter_delay = %.2f\n",p->router_delay); - printf("\trouting = %s\n",get_routing_alg_chararray(routing)); - printf("\tadaptive_threshold = %d\n",p->adaptive_threshold); - printf("\tmax hops notification = %d\n",p->max_hops_notify); - printf("------------------------------------------------------\n\n"); + (void)lp; + (void)bf; + + const dragonfly_param * p = s->params; + int i =0; + + for(; i < p->radix; i++) + { + s->ross_rsample.busy_time[i] = sample->busy_time[i]; + s->ross_rsample.link_traffic_sample[i] = sample->link_traffic_sample[i]; } + + s->ross_rsample.fwd_events = sample->fwd_events; + s->ross_rsample.rev_events = sample->rev_events; } -static void dragonfly_read_config(const char * anno, dragonfly_param *params) +static void ross_dally_dragonfly_sample_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample) { - /*Adding init for router magic number*/ - uint32_t h1 = 0, h2 = 0; - bj_hashlittle2(LP_METHOD_NM_ROUT, strlen(LP_METHOD_NM_ROUT), &h1, &h2); - router_magic_num = h1 + h2; - - bj_hashlittle2(LP_METHOD_NM_TERM, strlen(LP_METHOD_NM_TERM), &h1, &h2); - terminal_magic_num = h1 + h2; + (void)lp; + (void)bf; - // shorthand - dragonfly_param *p = params; - int myRank; - MPI_Comm_rank(MPI_COMM_CODES, &myRank); + sample->terminal_id = s->terminal_id; + sample->fin_chunks_sample = s->ross_sample.fin_chunks_sample; + sample->data_size_sample = s->ross_sample.data_size_sample; + sample->fin_hops_sample = s->ross_sample.fin_hops_sample; + sample->fin_chunks_time = s->ross_sample.fin_chunks_time; + sample->busy_time_sample = s->ross_sample.busy_time_sample; + sample->end_time = tw_now(lp); + sample->fwd_events = s->ross_sample.fwd_events; + sample->rev_events = s->ross_sample.rev_events; - int rc = configuration_get_value_int(&config, "PARAMS", "local_vc_size", anno, &p->local_vc_size); - if(rc) { - p->local_vc_size = 1024; - if(!myRank) - fprintf(stderr, "Buffer size of local channels not specified, setting to %d\n", p->local_vc_size); - } + s->ross_sample.fin_chunks_sample = 0; + s->ross_sample.data_size_sample = 0; + s->ross_sample.fin_hops_sample = 0; + s->ross_sample.fwd_events = 0; + s->ross_sample.rev_events = 0; + s->ross_sample.fin_chunks_time = 0; + s->ross_sample.busy_time_sample = 0; +} - rc = configuration_get_value_int(&config, "PARAMS", "num_qos_levels", anno, &p->num_qos_levels); - if(rc) { - p->num_qos_levels = 1; - if(!myRank) - fprintf(stderr, "Number of QOS levels not specified, setting to %d\n", p->num_qos_levels); - } +static void ross_dally_dragonfly_sample_rc_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample) +{ + (void)lp; + (void)bf; - char qos_levels_str[MAX_NAME_LENGTH]; - rc = configuration_get_value(&config, "PARAMS", "qos_bandwidth", anno, qos_levels_str, MAX_NAME_LENGTH); - p->qos_bandwidths = (int*)calloc(p->num_qos_levels, sizeof(int)); + s->ross_sample.busy_time_sample = sample->busy_time_sample; + s->ross_sample.fin_chunks_time = sample->fin_chunks_time; + s->ross_sample.fin_hops_sample = sample->fin_hops_sample; + s->ross_sample.data_size_sample = sample->data_size_sample; + s->ross_sample.fin_chunks_sample = sample->fin_chunks_sample; + s->ross_sample.fwd_events = sample->fwd_events; + s->ross_sample.rev_events = sample->rev_events; +} - if(p->num_qos_levels > 1) - { - int total_bw = 0; - char * token; - token = strtok(qos_levels_str, ","); - int i = 0; - while(token != NULL) - { - sscanf(token, "%d", &p->qos_bandwidths[i]); - total_bw += p->qos_bandwidths[i]; - if(p->qos_bandwidths[i] <= 0) - { - tw_error(TW_LOC, "\n Invalid bandwidth levels"); - } - i++; - token = strtok(NULL,","); - } - assert(total_bw <= 100); - } - else - p->qos_bandwidths[0] = 100; - rc = configuration_get_value_double(&config, "PARAMS", "max_qos_monitor", anno, &max_qos_monitor); - if(rc) { - if(!myRank) - fprintf(stderr, "Setting max_qos_monitor to %lf\n", max_qos_monitor); - } - rc = configuration_get_value_int(&config, "PARAMS", "adaptive_threshold", anno, &p->adaptive_threshold); - if (rc) { - if(!myRank) - fprintf(stderr, "Adaptive Minimal Routing Threshold not specified: setting to default = 0. (Will consider minimal and nonminimal routes based on scoring metric alone)\n"); - p->adaptive_threshold = 0; - } - - rc = configuration_get_value_int(&config, "PARAMS", "global_vc_size", anno, &p->global_vc_size); - if(rc) { - p->global_vc_size = 2048; - if(!myRank) - fprintf(stderr, "Buffer size of global channels not specified, setting to %d\n", p->global_vc_size); - } +void dragonfly_dally_rsample_init(router_state * s, + tw_lp * lp) +{ + (void)lp; + int i = 0; + const dragonfly_param * p = s->params; + assert(p->radix); - rc = configuration_get_value_int(&config, "PARAMS", "cn_vc_size", anno, &p->cn_vc_size); - if(rc) { - p->cn_vc_size = 1024; - if(!myRank) - fprintf(stderr, "Buffer size of compute node channels not specified, setting to %d\n", p->cn_vc_size); + s->max_arr_size = MAX_STATS; + s->rsamples = (struct dfly_router_sample*)calloc(MAX_STATS, sizeof(struct dfly_router_sample)); + for(; i < s->max_arr_size; i++) + { + s->rsamples[i].busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + s->rsamples[i].link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); } +} - rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", anno, &p->chunk_size); - if(rc) { - p->chunk_size = 512; - if(!myRank) - fprintf(stderr, "Chunk size for packets is specified, setting to %d\n", p->chunk_size); - } +void dragonfly_dally_rsample_rc_fn(router_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + (void)bf; + (void)lp; + (void)msg; - rc = configuration_get_value_int(&config, "PARAMS", "global_k_picks", anno, &p->global_k_picks); - if(rc) { - p->global_k_picks = 2; - if(!myRank) - fprintf(stderr, "global_k_picks for local adaptive routing not specified, setting to %d\n",p->global_k_picks); - } + s->op_arr_size--; + int cur_indx = s->op_arr_size; + struct dfly_router_sample stat = s->rsamples[cur_indx]; - rc = configuration_get_value_double(&config, "PARAMS", "local_bandwidth", anno, &p->local_bandwidth); - if(rc) { - p->local_bandwidth = 5.25; - if(!myRank) - fprintf(stderr, "Bandwidth of local channels not specified, setting to %lf\n", p->local_bandwidth); - } + const dragonfly_param * p = s->params; + int i =0; - rc = configuration_get_value_double(&config, "PARAMS", "global_bandwidth", anno, &p->global_bandwidth); - if(rc) { - p->global_bandwidth = 4.7; - if(!myRank) - fprintf(stderr, "Bandwidth of global channels not specified, setting to %lf\n", p->global_bandwidth); + for(; i < p->radix; i++) + { + s->busy_time_sample[i] = stat.busy_time[i]; + s->link_traffic_sample[i] = stat.link_traffic_sample[i]; } - rc = configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", anno, &p->cn_bandwidth); - if(rc) { - p->cn_bandwidth = 5.25; - if(!myRank) - fprintf(stderr, "Bandwidth of compute node channels not specified, setting to %lf\n", p->cn_bandwidth); + for( i = 0; i < p->radix; i++) + { + stat.busy_time[i] = 0; + stat.link_traffic_sample[i] = 0; } + s->fwd_events = stat.fwd_events; + s->rev_events = stat.rev_events; +} - rc = configuration_get_value_double(&config, "PARAMS", "router_delay", anno, - &p->router_delay); - if(rc) { - p->router_delay = 100; - } +void dragonfly_dally_rsample_fn(router_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + (void)bf; + (void)lp; + (void)msg; - configuration_get_value(&config, "PARAMS", "cn_sample_file", anno, cn_sample_file, - MAX_NAME_LENGTH); - configuration_get_value(&config, "PARAMS", "rt_sample_file", anno, router_sample_file, - MAX_NAME_LENGTH); - - char routing_str[MAX_NAME_LENGTH]; - configuration_get_value(&config, "PARAMS", "routing", anno, routing_str, - MAX_NAME_LENGTH); - if(strcmp(routing_str, "minimal") == 0) - routing = MINIMAL; - else if(strcmp(routing_str, "nonminimal")==0 || - strcmp(routing_str,"non-minimal")==0) - routing = NON_MINIMAL; - else if (strcmp(routing_str, "adaptive") == 0) - routing = ADAPTIVE; - else if (strcmp(routing_str, "prog-adaptive") == 0) - routing = PROG_ADAPTIVE; - else if (strcmp(routing_str, "prog-adaptive-legacy") == 0) - routing = PROG_ADAPTIVE_LEGACY; - else + const dragonfly_param * p = s->params; + + if(s->op_arr_size >= s->max_arr_size) { - if(!myRank) - fprintf(stderr, "No routing protocol specified, setting to minimal routing\n"); - routing = MINIMAL; + struct dfly_router_sample * tmp = (dfly_router_sample *)calloc((MAX_STATS + s->max_arr_size), sizeof(struct dfly_router_sample)); + memcpy(tmp, s->rsamples, s->op_arr_size * sizeof(struct dfly_router_sample)); + free(s->rsamples); + s->rsamples = tmp; + s->max_arr_size += MAX_STATS; } - char scoring_str[MAX_NAME_LENGTH]; - configuration_get_value(&config, "PARAMS", "route_scoring_metric", anno, scoring_str, MAX_NAME_LENGTH); - if (strcmp(scoring_str, "alpha") == 0) { - scoring = ALPHA; - } - else if (strcmp(scoring_str, "beta") == 0) { - scoring = BETA; - } - else if (strcmp(scoring_str, "gamma") == 0) { - tw_error(TW_LOC, "Gamma scoring protocol currently non-functional"); //TODO: Fix gamma scoring protocol - scoring = GAMMA; - } - else if (strcmp(scoring_str, "delta") == 0) { - scoring = DELTA; - } - else { - if(!myRank) - fprintf(stderr, "No route scoring protocol specified, setting to DELTA scoring\n"); - scoring = DELTA; - } + int i = 0; + int cur_indx = s->op_arr_size; - rc = configuration_get_value_int(&config, "PARAMS", "notification_on_hops_greater_than", anno, &p->max_hops_notify); - if (rc) { - if(!myRank) - printf("Maximum hops for notifying not specified, setting to INT MAX\n"); - p->max_hops_notify = INT_MAX; + s->rsamples[cur_indx].router_id = s->router_id; + s->rsamples[cur_indx].end_time = tw_now(lp); + s->rsamples[cur_indx].fwd_events = s->fwd_events; + s->rsamples[cur_indx].rev_events = s->rev_events; + + for(; i < p->radix; i++) + { + s->rsamples[cur_indx].busy_time[i] = s->busy_time_sample[i]; + s->rsamples[cur_indx].link_traffic_sample[i] = s->link_traffic_sample[i]; } - p->num_vcs = 4; - - if(p->num_qos_levels > 1) - p->num_vcs = p->num_qos_levels * p->num_vcs; + s->op_arr_size++; - rc = configuration_get_value_int(&config, "PARAMS", "num_groups", anno, &p->num_groups); - if(rc) { - tw_error(TW_LOC, "\nnum_groups not specified, Aborting\n"); - } - - rc = configuration_get_value_int(&config, "PARAMS", "num_routers", anno, &p->num_routers); - if(rc) { - tw_error(TW_LOC, "\nnum_routers not specified, Aborting\n"); - } - - rc = configuration_get_value_int(&config, "PARAMS", "num_cns_per_router", anno, &p->num_cn); - if(rc) { - if(!myRank) - fprintf(stderr,"Number of cns per router not specified, setting to %d\n", p->num_routers/2); - p->num_cn = p->num_routers/2; - } + /* clear up the current router stats */ + s->fwd_events = 0; + s->rev_events = 0; - rc = configuration_get_value_int(&config, "PARAMS", "num_global_channels", anno, &p->num_global_channels); - if(rc) { - if(!myRank) - fprintf(stderr,"Number of global channels per router not specified, setting to 10\n"); - p->num_global_channels = 10; + for( i = 0; i < p->radix; i++) + { + s->busy_time_sample[i] = 0; + s->link_traffic_sample[i] = 0; } - p->intra_grp_radix = p->num_routers -1; //TODO allow for parallel connections - p->radix = p->intra_grp_radix + p->num_global_channels + p->num_cn; - p->total_routers = p->num_groups * p->num_routers; - p->total_terminals = p->total_routers * p->num_cn; - +} - //setup Connection Managers for each router - for(int i = 0; i < p->total_routers; i++) +//TODO redo this +void dragonfly_dally_rsample_fin(router_state * s, + tw_lp * lp) +{ + (void)lp; + const dragonfly_param * p = s->params; + + if(s->router_id == 0) { - int src_id_global = i; - int src_id_local = i % p->num_routers; - int src_group = i / p->num_routers; + /* write metadata file */ + char meta_fname[64]; + sprintf(meta_fname, "dragonfly-router-sampling.meta"); - ConnectionManager conman = ConnectionManager(src_id_local, src_id_global, src_group, p->intra_grp_radix, p->num_global_channels, p->num_cn, p->num_routers); - connManagerList.push_back(conman); + FILE * fp = fopen(meta_fname, "w"); + fprintf(fp, "Router sample struct format: \nrouter_id (tw_lpid) \nbusy time for each of the %d links (double) \n" + "link traffic for each of the %d links (int64_t) \nsample end time (double) forward events per sample \nreverse events per sample ", + p->radix, p->radix); + // fprintf(fp, "\n\nOrdering of links \n%d green (router-router same row) channels \n %d black (router-router same column) channels \n %d global (router-router remote group)" + // " channels \n %d terminal channels", p->num_router_cols * p->num_row_chans, p->num_router_rows * p->num_col_chans, p->num_global_channels, p->num_cn); + fclose(fp); } + char rt_fn[MAX_NAME_LENGTH]; + if(strcmp(router_sample_file, "") == 0) + sprintf(rt_fn, "dragonfly-router-sampling-%ld.bin", g_tw_mynode); + else + sprintf(rt_fn, "%s-%ld.bin", router_sample_file, g_tw_mynode); + + int i = 0; - // read intra group connections, store from a router's perspective - // all links to the same router form a vector - char intraFile[MAX_NAME_LENGTH]; - configuration_get_value(&config, "PARAMS", "intra-group-connections", - anno, intraFile, MAX_NAME_LENGTH); - if (strlen(intraFile) <= 0) { - tw_error(TW_LOC, "Intra group connections file not specified. Aborting"); - } - FILE *groupFile = fopen(intraFile, "rb"); - if (!groupFile) - tw_error(TW_LOC, "intra-group file not found "); - - if (!myRank) - printf("Reading intra-group connectivity file: %s\n", intraFile); - - IntraGroupLink newLink; - while (fread(&newLink, sizeof(IntraGroupLink), 1, groupFile) != 0 ) { - int src_id_local = newLink.src; - int dest_id_local = newLink.dest; + int size_sample = sizeof(tw_lpid) + p->radix * (sizeof(int64_t) + sizeof(tw_stime)) + sizeof(tw_stime) + 2 * sizeof(long); + FILE * fp = fopen(rt_fn, "a"); + fseek(fp, sample_rtr_bytes_written, SEEK_SET); - for(int i = 0; i < p->total_routers; i++) + for(; i < s->op_arr_size; i++) { - int group_id = i/p->num_routers; - if (i % p->num_routers == src_id_local) - { - int dest_id_gloabl = group_id * p->num_routers + dest_id_local; - connManagerList[i].add_connection(dest_id_gloabl, CONN_LOCAL); - } + fwrite((void*)&(s->rsamples[i].router_id), sizeof(tw_lpid), 1, fp); + fwrite(s->rsamples[i].busy_time, sizeof(tw_stime), p->radix, fp); + fwrite(s->rsamples[i].link_traffic_sample, sizeof(int64_t), p->radix, fp); + fwrite((void*)&(s->rsamples[i].end_time), sizeof(tw_stime), 1, fp); + fwrite((void*)&(s->rsamples[i].fwd_events), sizeof(long), 1, fp); + fwrite((void*)&(s->rsamples[i].rev_events), sizeof(long), 1, fp); } - } - fclose(groupFile); + sample_rtr_bytes_written += (s->op_arr_size * size_sample); + fclose(fp); +} +void dragonfly_dally_sample_init(terminal_state * s, + tw_lp * lp) +{ + (void)lp; + s->fin_chunks_sample = 0; + s->data_size_sample = 0; + s->fin_hops_sample = 0; + s->fin_chunks_time = 0; + s->busy_time_sample = 0; - //terminal assignment - for(int i = 0; i < p->total_terminals; i++) - { - int assigned_router_id = (int) i / p->num_cn; - int assigned_group_id = assigned_router_id / p->num_routers; - connManagerList[assigned_router_id].add_connection(i, CONN_TERMINAL); - } + s->op_arr_size = 0; + s->max_arr_size = MAX_STATS; - // read inter group connections, store from a router's perspective - // also create a group level table that tells all the connecting routers - char interFile[MAX_NAME_LENGTH]; - configuration_get_value(&config, "PARAMS", "inter-group-connections", - anno, interFile, MAX_NAME_LENGTH); - if(strlen(interFile) <= 0) { - tw_error(TW_LOC, "Inter group connections file not specified. Aborting"); - } - FILE *systemFile = fopen(interFile, "rb"); - if(!myRank) + s->sample_stat = (dfly_cn_sample *)calloc(MAX_STATS, sizeof(struct dfly_cn_sample)); + +} +void dragonfly_dally_sample_rc_fn(terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + (void)lp; + (void)bf; + (void)msg; + + s->op_arr_size--; + int cur_indx = s->op_arr_size; + struct dfly_cn_sample stat = s->sample_stat[cur_indx]; + s->busy_time_sample = stat.busy_time_sample; + s->fin_chunks_time = stat.fin_chunks_time; + s->fin_hops_sample = stat.fin_hops_sample; + s->data_size_sample = stat.data_size_sample; + s->fin_chunks_sample = stat.fin_chunks_sample; + s->fwd_events = stat.fwd_events; + s->rev_events = stat.rev_events; + + stat.busy_time_sample = 0; + stat.fin_chunks_time = 0; + stat.fin_hops_sample = 0; + stat.data_size_sample = 0; + stat.fin_chunks_sample = 0; + stat.end_time = 0; + stat.terminal_id = 0; + stat.fwd_events = 0; + stat.rev_events = 0; +} + +void dragonfly_dally_sample_fn(terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + (void)lp; + (void)msg; + (void)bf; + + if(s->op_arr_size >= s->max_arr_size) { - printf("Reading inter-group connectivity file: %s\n", interFile); - printf("\n Total routers %d total groups %d ", p->total_routers, p->num_groups); + /* In the worst case, copy array to a new memory location, its very + * expensive operation though */ + struct dfly_cn_sample * tmp = (dfly_cn_sample *)calloc((MAX_STATS + s->max_arr_size), sizeof(struct dfly_cn_sample)); + memcpy(tmp, s->sample_stat, s->op_arr_size * sizeof(struct dfly_cn_sample)); + free(s->sample_stat); + s->sample_stat = tmp; + s->max_arr_size += MAX_STATS; } + + int cur_indx = s->op_arr_size; - connectionList.resize(p->num_groups); - for(int g = 0; g < connectionList.size(); g++) { - connectionList[g].resize(p->num_groups); - } + s->sample_stat[cur_indx].terminal_id = s->terminal_id; + s->sample_stat[cur_indx].fin_chunks_sample = s->fin_chunks_sample; + s->sample_stat[cur_indx].data_size_sample = s->data_size_sample; + s->sample_stat[cur_indx].fin_hops_sample = s->fin_hops_sample; + s->sample_stat[cur_indx].fin_chunks_time = s->fin_chunks_time; + s->sample_stat[cur_indx].busy_time_sample = s->busy_time_sample; + s->sample_stat[cur_indx].end_time = tw_now(lp); + s->sample_stat[cur_indx].fwd_events = s->fwd_events; + s->sample_stat[cur_indx].rev_events = s->rev_events; - InterGroupLink newInterLink; - while (fread(&newInterLink, sizeof(InterGroupLink), 1, systemFile) != 0) { - int src_id_global = newInterLink.src; - int src_group_id = src_id_global / p->num_routers; - int dest_id_global = newInterLink.dest; - int dest_group_id = dest_id_global / p->num_routers; + s->op_arr_size++; + s->fin_chunks_sample = 0; + s->data_size_sample = 0; + s->fin_hops_sample = 0; + s->fwd_events = 0; + s->rev_events = 0; + s->fin_chunks_time = 0; + s->busy_time_sample = 0; +} - connManagerList[src_id_global].add_connection(dest_id_global, CONN_GLOBAL); +void dragonfly_dally_sample_fin(terminal_state * s, + tw_lp * lp) +{ + (void)lp; + + if(!g_tw_mynode) + { + + /* write metadata file */ + char meta_fname[64]; + sprintf(meta_fname, "dragonfly-cn-sampling.meta"); - int r; - for (r = 0; r < connectionList[src_group_id][dest_group_id].size(); r++) { - if (connectionList[src_group_id][dest_group_id][r] == newInterLink.src) - break; - } - if (r == connectionList[src_group_id][dest_group_id].size()) { - connectionList[src_group_id][dest_group_id].push_back(newInterLink.src); - } + FILE * fp = fopen(meta_fname, "w"); + fprintf(fp, "Compute node sample format\nterminal_id (tw_lpid) \nfinished chunks (long)" + "\ndata size per sample (long) \nfinished hops (double) \ntime to finish chunks (double)" + "\nbusy time (double)\nsample end time(double) \nforward events (long) \nreverse events (long)"); + fclose(fp); } - if (DUMP_CONNECTIONS) - { - if (!myRank) { - for (int i = 0; i < connManagerList.size(); i++) - { - connManagerList[i].print_connections(); - } - } - } + char rt_fn[MAX_NAME_LENGTH]; + if(strncmp(cn_sample_file, "", 10) == 0) + sprintf(rt_fn, "dragonfly-cn-sampling-%ld.bin", g_tw_mynode); + else + sprintf(rt_fn, "%s-%ld.bin", cn_sample_file, g_tw_mynode); - fclose(systemFile); + FILE * fp = fopen(rt_fn, "a"); + fseek(fp, sample_bytes_written, SEEK_SET); + fwrite(s->sample_stat, sizeof(struct dfly_cn_sample), s->op_arr_size, fp); + fclose(fp); - if(!myRank) { - printf("\n Total nodes %d routers %d groups %d routers per group %d radix %d\n", - p->num_cn * p->total_routers, p->total_routers, p->num_groups, - p->num_routers, p->radix); - } + sample_bytes_written += (s->op_arr_size * sizeof(struct dfly_cn_sample)); +} - p->cn_delay = bytes_to_ns(p->chunk_size, p->cn_bandwidth); - p->local_delay = bytes_to_ns(p->chunk_size, p->local_bandwidth); - p->global_delay = bytes_to_ns(p->chunk_size, p->global_bandwidth); - p->credit_delay = bytes_to_ns(CREDIT_SIZE, p->local_bandwidth); //assume 8 bytes packet +static short routing = MINIMAL; +static short scoring = ALPHA; - if (PRINT_CONFIG) - dragonfly_print_params(p); +/*Routing Implementation Declarations*/ +static Connection dfdally_minimal_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id); +static Connection dfdally_nonminimal_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id); +static Connection dfdally_prog_adaptive_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id); +static Connection dfdally_prog_adaptive_legacy_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id); - stored_params = p; -} +/*Routing Helper Declarations*/ +static void dfdally_select_intermediate_group(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id); -void dragonfly_dally_configure() { - anno_map = codes_mapping_get_lp_anno_map(LP_CONFIG_NM_TERM); - assert(anno_map); - num_params = anno_map->num_annos + (anno_map->has_unanno_lp > 0); - all_params = (dragonfly_param *)calloc(num_params, sizeof(*all_params)); +static tw_stime dragonfly_total_time = 0; +static tw_stime dragonfly_max_latency = 0; - for (int i = 0; i < anno_map->num_annos; i++) { - const char * anno = anno_map->annotations[i].ptr; - dragonfly_read_config(anno, &all_params[i]); - } - if (anno_map->has_unanno_lp > 0){ - dragonfly_read_config(NULL, &all_params[anno_map->num_annos]); - } -#ifdef ENABLE_CORTEX - model_net_topology = dragonfly_dally_cortex_topology; -#endif + +static long long total_hops = 0; +static long long N_finished_packets = 0; +static long long total_msg_sz = 0; +static long long N_finished_msgs = 0; +static long long N_finished_chunks = 0; + +/* convert ns to seconds */ +static tw_stime ns_to_s(tw_stime ns) +{ + return(ns / (1000.0 * 1000.0 * 1000.0)); } -/* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */ -void dragonfly_dally_report_stats() +static double bytes_to_gigabytes(double bytes) { - long long avg_hops, total_finished_packets, total_finished_chunks; - long long total_finished_msgs, final_msg_sz; - tw_stime avg_time, max_time; - int total_minimal_packets, total_nonmin_packets; - long total_gen, total_fin; - long total_local_packets_sr, total_local_packets_sg, total_remote_packets; + return bytes / (double) (1024 * 1024 * 1024); +} +static int dragonfly_rank_hash_compare( + void *key, struct qhash_head *link) +{ + struct dfly_hash_key *message_key = (struct dfly_hash_key *)key; + struct dfly_qhash_entry *tmp = NULL; - MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &dragonfly_total_time, &avg_time, 1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &dragonfly_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES); + tmp = qhash_entry(link, struct dfly_qhash_entry, hash_link); - MPI_Reduce( &packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &num_local_packets_sr, &total_local_packets_sr, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &num_local_packets_sg, &total_local_packets_sg, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &num_remote_packets, &total_remote_packets, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); - if(routing == ADAPTIVE || routing == PROG_ADAPTIVE || routing == PROG_ADAPTIVE_LEGACY || SHOW_ADAP_STATS) - { - MPI_Reduce(&minimal_count, &total_minimal_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&nonmin_count, &total_nonmin_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); - } - - /* print statistics */ - if(!g_tw_mynode) - { - if (PRINT_CONFIG) - dragonfly_print_params(stored_params); + if (tmp->key.message_id == message_key->message_id && tmp->key.sender_id == message_key->sender_id) + return 1; - printf("\nAverage number of hops traversed %f average chunk latency %lf us maximum chunk latency %lf us avg message size %lf bytes finished messages %lld finished chunks %lld\n", - (float)avg_hops/total_finished_chunks, (float) avg_time/total_finished_chunks/1000, max_time/1000, (float)final_msg_sz/total_finished_msgs, total_finished_msgs, total_finished_chunks); - if(routing == ADAPTIVE || routing == PROG_ADAPTIVE || routing == PROG_ADAPTIVE_LEGACY || SHOW_ADAP_STATS) - printf("\nADAPTIVE ROUTING STATS: %d chunks routed minimally %d chunks routed non-minimally completed packets %lld \n", - total_minimal_packets, total_nonmin_packets, total_finished_chunks); - - printf("\nTotal packets generated %ld finished %ld Locally routed- same router %ld different-router %ld Remote (inter-group) %ld \n", total_gen, total_fin, total_local_packets_sr, total_local_packets_sg, total_remote_packets); - } - return; + return 0; } - -void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +static int dragonfly_hash_func(void *k, int table_size) { - for(int i = 0 ; i < msg->num_cll; i++) - codes_local_latency_reverse(lp); - - int num_qos_levels = s->params->num_qos_levels; - - if(msg->rc_is_qos_set == 1) - { - for(int i = 0; i < num_qos_levels; i++) - { - s->qos_data[i] = msg->rc_qos_data[i]; - s->qos_status[i] = msg->rc_qos_status[i]; - } - - free(msg->rc_qos_data); - free(msg->rc_qos_status); - msg->rc_is_qos_set = 0; - } - + struct dfly_hash_key *tmp = (struct dfly_hash_key *)k; + uint32_t pc = 0, pb = 0; + bj_hashlittle2(tmp, sizeof(*tmp), &pc, &pb); + return (int)(pc % (table_size - 1)); + /*uint64_t key = (~tmp->message_id) + (tmp->message_id << 18); + key = key * 21; + key = ~key ^ (tmp->sender_id >> 4); + key = key * tmp->sender_id; + return (int)(key & (table_size - 1));*/ } -/* resets the bandwidth numbers recorded so far */ -void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) + +/* convert GiB/s and bytes to ns */ +static tw_stime bytes_to_ns(uint64_t bytes, double GB_p_s) { - - msg->num_cll = 0; - msg->num_rngs = 0; - int num_qos_levels = s->params->num_qos_levels; - - //RC data storage start. - //Allocate memory here for these pointers that are stored in the events. FREE THESE IN RC OR IN COMMIT_F - msg->rc_qos_data = (unsigned long long *) calloc(num_qos_levels, sizeof(unsigned long long)); - msg->rc_qos_status = (int *) calloc(num_qos_levels, sizeof(int)); + tw_stime time; - //store qos data and status into the arrays. Pointers to the arrays are stored in events. - for(int i = 0; i < num_qos_levels; i++) - { - msg->rc_qos_data[i] = s->qos_data[i]; - msg->rc_qos_status[i] = s->qos_status[i]; - } - msg->rc_is_qos_set = 1; - //RC data storage end. + /* bytes to GB */ + time = ((double)bytes)/(1024.0*1024.0*1024.0); + /* GiB to s */ + time = time / GB_p_s; + /* s to ns */ + time = time * 1000.0 * 1000.0 * 1000.0; - /* Reset the qos status and bandwidth consumption. */ - for(int i = 0; i < num_qos_levels; i++) - { - s->qos_status[i] = Q_ACTIVE; - s->qos_data[i] = 0; - } + return(time); +} - if(tw_now(lp) > max_qos_monitor) - return; - - msg->num_cll++; - terminal_dally_message * m; - tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); - tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, DRAGONFLY_DALLY, - (void**)&m, NULL); - m->type = T_BANDWIDTH; - m->magic = terminal_magic_num; - tw_event_send(e); +/* returns the dragonfly message size */ +int dragonfly_dally_get_msg_sz(void) +{ + return sizeof(terminal_dally_message); } -void issue_rtr_bw_monitor_event_rc(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) +static void free_tmp(void * ptr) { - int radix = s->params->radix; - int num_qos_levels = s->params->num_qos_levels; + struct dfly_qhash_entry * dfly = (dfly_qhash_entry *)ptr; + if(dfly->remote_event_data) + free(dfly->remote_event_data); + + if(dfly) + free(dfly); +} - for(int i = 0 ; i < msg->num_cll; i++) - codes_local_latency_reverse(lp); +static int dfdally_score_connection(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, Connection conn, conn_minimality_t c_minimality) +{ + int score = 0; + int port = conn.port; - if(msg->rc_is_qos_set == 1) - { - for(int i = 0; i < radix; i++) - { - for(int j = 0; j < num_qos_levels; j++) + if (port == -1) { + return INT_MAX; + } + + switch (scoring) { + case ALPHA: //considers vc occupancy and queued count only + for(int k=0; k < s->params->num_vcs; k++) { - s->qos_data[i][j] = *(indexer2d(msg->rc_qos_data, i, j, radix, num_qos_levels)); - s->qos_status[i][j] = *(indexer2d(msg->rc_qos_status, i, j, radix, num_qos_levels)); + score += s->vc_occupancy[port][k]; } - } + score += s->queued_count[port]; + break; + case BETA: //considers vc occupancy and queued count multiplied by the number of minimal hops to destination from the potential next stop + tw_error(TW_LOC, "Beta scoring not implemented"); + break; + case GAMMA: //delta scoring but higher is better + tw_error(TW_LOC, "Gamma scoring not implemented"); + break; + case DELTA: //alpha but biased 2:1 toward minimal + for(int k=0; k < s->params->num_vcs; k++) + { + score += s->vc_occupancy[port][k]; + } + score += s->queued_count[port]; - free(msg->rc_qos_data); - free(msg->rc_qos_status); - msg->rc_is_qos_set = 0; + if (c_minimality != C_MIN) + score = score * 2; + break; + default: + tw_error(TW_LOC, "Unsupported Scoring Protocol Error\n"); } + return score; } -void issue_rtr_bw_monitor_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) -{ - msg->num_cll = 0; - msg->num_rngs = 0; - int radix = s->params->radix; - int num_qos_levels = s->params->num_qos_levels; - +//Now returns random selection from tied best connections. +static Connection get_absolute_best_connection_from_conns(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, vector< Connection > conns) +{ + if (conns.size() == 0) { //passed no connections to this but we got to return something - return negative filled conn to force a break if not caught + Connection bad_conn; + bad_conn.src_gid = -1; + bad_conn.port = -1; + return bad_conn; + } + if (conns.size() == 1) { //no need to compare singular connection + return conns[0]; + } - //RC data storage start. - //Allocate memory here for these pointers that are stored in the events. FREE THESE IN RC OR IN COMMIT_F - msg->rc_qos_data = (unsigned long long *) calloc(radix * num_qos_levels, sizeof(unsigned long long)); - msg->rc_qos_status = (int *) calloc(radix * num_qos_levels, sizeof(int)); + int num_to_compare = conns.size(); + int scores[num_to_compare]; + vector < Connection > best_conns; + int best_score = INT_MAX; - //store qos data and status into the arrays. Pointers to the arrays are stored in events. - for(int i = 0; i < radix; i++) + for(int i = 0; i < num_to_compare; i++) { - for(int j = 0; j < num_qos_levels; j++) - { - *(indexer2d(msg->rc_qos_data, i, j, radix, num_qos_levels)) = s->qos_data[i][j]; - *(indexer2d(msg->rc_qos_status, i, j, radix, num_qos_levels)) = s->qos_status[i][j]; + scores[i] = dfdally_score_connection(s, bf, msg, lp, conns[i], C_MIN); + if (scores[i] <= best_score) { + if (scores[i] < best_score) { + best_score = scores[i]; + best_conns.clear(); + best_conns.push_back(conns[i]); + } + else { + best_conns.push_back(conns[i]); + } } } - msg->rc_is_qos_set = 1; - //RC data storage end. + assert(best_conns.size() > 0); + + msg->num_rngs++; + return best_conns[tw_rand_integer(lp->rng, 0, best_conns.size()-1)]; +} - for(int i = 0; i < radix; i++) +// This is not the most efficient way to do things as k approaches the size(conns). +// For low k it's more efficient than doing a full shuffle to sample a few random indices, though. +static vector< Connection > dfdally_poll_k_connections(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, vector< Connection > conns, int k) +{ + vector< Connection > k_conns; + if (conns.size() == 0) { - for(int j = 0; j < num_qos_levels; j++) - { - int bw_consumed = get_rtr_bandwidth_consumption(s, j, i); - - #if DEBUG_QOS == 1 - if(dragonfly_rtr_bw_log != NULL) - { - if(s->qos_data[j][k] > 0) - { - fprintf(dragonfly_rtr_bw_log, "\n %d %f %d %d %d %d %d %f", s->router_id, tw_now(lp), i, j, bw_consumed, s->qos_status[i][j], s->qos_data[i][j], s->busy_time_sample[i]); - } - } - #endif - } + return k_conns; } - /* Reset the qos status and bandwidth consumption. */ - for(int i = 0; i < s->params->radix; i++) + if (conns.size() == 1) { - for(int j = 0; j < num_qos_levels; j++) - { - s->qos_status[i][j] = Q_ACTIVE; - s->qos_data[i][j] = 0; - } - s->busy_time_sample[i] = 0; - s->ross_rsample.busy_time[i] = 0; + k_conns.push_back(conns[0]); + return k_conns; } - if(tw_now(lp) > max_qos_monitor) - return; - - msg->num_cll++; - tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); - terminal_dally_message *m; - tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, - DRAGONFLY_DALLY_ROUTER, (void**)&m, NULL); - m->type = R_BANDWIDTH; - m->magic = router_magic_num; - tw_event_send(e); -} + if (k == 2) { //This is the default and so let's make a cheaper optimization for it + msg->num_rngs += 2; -void router_dally_commit(router_state * s, - tw_bf * bf, - terminal_dally_message * msg, - tw_lp * lp) -{ - if(msg->type == R_BANDWIDTH) + int rand_sel_1, rand_sel_2, rand_sel_2_offset; + rand_sel_1 = tw_rand_integer(lp->rng, 0, conns.size()-1); + rand_sel_2_offset = tw_rand_integer(lp->rng, 1, conns.size()-1); + rand_sel_2 = (rand_sel_1 + rand_sel_2_offset) % conns.size(); + + k_conns.push_back(conns[rand_sel_1]); + k_conns.push_back(conns[rand_sel_2]); + + return k_conns; + } + // if (k > conns.size()) + // tw_error(TW_LOC, "Attempted to poll k random connections but k (%d) is greater than number of connections (%d)",k,conns.size()); + + // create set of unique random k indicies + int last_sel = 0; + set< int > rand_sels; + for (int i = 0; i < k; i++) { - if(msg->rc_is_qos_set == 1) { - free(msg->rc_qos_data); - free(msg->rc_qos_status); - msg->rc_is_qos_set = 0; + int rand_int = tw_rand_integer(lp->rng, 0, (conns.size() - 1) - rand_sels.size()); + int attempt_offset = (last_sel + rand_int) % conns.size(); //get a hopefully unused index - this method of sampling without replacement results in only about + while (rand_sels.count(attempt_offset) != 0) //increment till we find an unused index + { + attempt_offset = (attempt_offset + 1) % conns.size(); } + rand_sels.insert(attempt_offset); + last_sel = attempt_offset; } -} -void terminal_dally_commit(terminal_state * s, - tw_bf * bf, - terminal_dally_message * msg, - tw_lp * lp) -{ - if(msg->type == T_BANDWIDTH) + msg->num_rngs += k; // we only used the rng k times + + // use random k set to create vector of k connections + for(set:: iterator it = rand_sels.begin() ; it != rand_sels.end() ; it++) { - if(msg->rc_is_qos_set == 1) { - free(msg->rc_qos_data); - free(msg->rc_qos_status); - msg->rc_is_qos_set = 0; - } + k_conns.push_back(conns[*it]); } + + return k_conns; } -/* initialize a dragonfly compute node terminal */ -void -terminal_dally_init( terminal_state * s, - tw_lp * lp ) +// note that this is somewhat expensive the larger k is in comparison to the total possible +// consider an optimization to implement an efficient shuffle to poll k random sampling instead +// consider an optimization for the default of 2 +static Connection dfdally_get_best_from_k_connections(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, vector< Connection > conns, int k) { - s->packet_gen = 0; - s->packet_fin = 0; - s->total_gen_size = 0; - s->is_monitoring_bw = 0; + vector< Connection > k_conns = dfdally_poll_k_connections(s, bf, msg, lp, conns, k); + return get_absolute_best_connection_from_conns(s, bf, msg, lp, k_conns); +} - int i; - char anno[MAX_NAME_LENGTH]; +static void append_to_terminal_dally_message_list( + terminal_dally_message_list ** thisq, + terminal_dally_message_list ** thistail, + int index, + terminal_dally_message_list *msg) +{ +// printf("\n msg id %d ", msg->msg.packet_ID); + if (thisq[index] == NULL) { + thisq[index] = msg; + } + else { + assert(thistail[index] != NULL); + thistail[index]->next = msg; + msg->prev = thistail[index]; + } + thistail[index] = msg; +// printf("\n done adding %d ", msg->msg.packet_ID); +} - // Assign the global router ID - // TODO: be annotation-aware - codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, - &mapping_type_id, anno, &mapping_rep_id, &mapping_offset); - if (anno[0] == '\0'){ - s->anno = NULL; - s->params = &all_params[num_params-1]; - } - else{ - s->anno = strdup(anno); - int id = configuration_get_annotation_index(anno, anno_map); - s->params = &all_params[id]; +static void prepend_to_terminal_dally_message_list( + terminal_dally_message_list ** thisq, + terminal_dally_message_list ** thistail, + int index, + terminal_dally_message_list *msg) +{ + if (thisq[index] == NULL) { + thistail[index] = msg; + } + else { + thisq[index]->prev = msg; + msg->next = thisq[index]; + } + thisq[index] = msg; +} + +static terminal_dally_message_list* return_head( + terminal_dally_message_list ** thisq, + terminal_dally_message_list ** thistail, + int index) +{ + terminal_dally_message_list *head = thisq[index]; + if (head != NULL) { + thisq[index] = head->next; + if(head->next != NULL) { + head->next->prev = NULL; + head->next = NULL; + } + else { + thistail[index] = NULL; + } } + return head; +} - int num_qos_levels = s->params->num_qos_levels; - int num_lps = codes_mapping_get_lp_count(lp_group_name, 1, LP_CONFIG_NM_TERM, - s->anno, 0); +static terminal_dally_message_list* return_tail( + terminal_dally_message_list ** thisq, + terminal_dally_message_list ** thistail, + int index) +{ + terminal_dally_message_list *tail = thistail[index]; + assert(tail); + if (tail->prev != NULL) { + tail->prev->next = NULL; + thistail[index] = tail->prev; + tail->prev = NULL; + } + else { + thistail[index] = NULL; + thisq[index] = NULL; + } + return tail; +} - s->terminal_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); - s->router_id=(int)s->terminal_id / (s->params->num_cn); - s->terminal_available_time = 0.0; - s->packet_counter = 0; - s->min_latency = INT_MAX; - s->max_latency = 0; +void dragonfly_print_params(const dragonfly_param *p, _IO_FILE * st) +{ + if(!st) + st = stdout; + + fprintf(st,"\n------------------ Dragonfly Dally Parameters ---------\n"); + fprintf(st,"\tnum_routers = %d\n",p->num_routers); + fprintf(st,"\tlocal_bandwidth = %.2f\n",p->local_bandwidth); + fprintf(st,"\tglobal_bandwidth = %.2f\n",p->global_bandwidth); + fprintf(st,"\tcn_bandwidth = %.2f\n",p->cn_bandwidth); + fprintf(st,"\tnum_vcs = %d\n",p->num_vcs); + fprintf(st,"\tnum_qos_levels = %d\n",p->num_qos_levels); + fprintf(st,"\tlocal_vc_size = %d\n",p->local_vc_size); + fprintf(st,"\tglobal_vc_size = %d\n",p->global_vc_size); + fprintf(st,"\tcn_vc_size = %d\n",p->cn_vc_size); + fprintf(st,"\tchunk_size = %d\n",p->chunk_size); + fprintf(st,"\tnum_cn = %d\n",p->num_cn); + fprintf(st,"\tintra_grp_radix = %d\n",p->intra_grp_radix); + fprintf(st,"\tnum_groups = %d\n",p->num_groups); + fprintf(st,"\tvirtual radix = %d\n",p->radix); + fprintf(st,"\ttotal_routers = %d\n",p->total_routers); + fprintf(st,"\ttotal_terminals = %d\n",p->total_terminals); + fprintf(st,"\tnum_global_channels = %d\n",p->num_global_channels); + fprintf(st,"\tcn_delay = %.2f\n",p->cn_delay); + fprintf(st,"\tlocal_delay = %.2f\n",p->local_delay); + fprintf(st,"\tglobal_delay = %.2f\n",p->global_delay); + fprintf(st,"\tlocal credit_delay = %.2f\n",p->local_credit_delay); + fprintf(st,"\tglobal credit_delay = %.2f\n",p->global_credit_delay); + fprintf(st,"\tcn credit_delay = %.2f\n",p->cn_credit_delay); + fprintf(st,"\trouter_delay = %.2f\n",p->router_delay); + fprintf(st,"\trouting = %s\n",get_routing_alg_chararray(routing)); + fprintf(st,"\tadaptive_threshold = %d\n",p->adaptive_threshold); + fprintf(st,"\tmax hops notification = %d\n",p->max_hops_notify); + fprintf(st,"------------------------------------------------------\n\n"); +} - s->finished_msgs = 0; - s->finished_chunks = 0; - s->finished_packets = 0; - s->total_time = 0.0; - s->total_msg_size = 0; +static void dragonfly_read_config(const char * anno, dragonfly_param *params) +{ + /*Adding init for router magic number*/ + uint32_t h1 = 0, h2 = 0; + bj_hashlittle2(LP_METHOD_NM_ROUT, strlen(LP_METHOD_NM_ROUT), &h1, &h2); + router_magic_num = h1 + h2; + + bj_hashlittle2(LP_METHOD_NM_TERM, strlen(LP_METHOD_NM_TERM), &h1, &h2); + terminal_magic_num = h1 + h2; + + // shorthand + dragonfly_param *p = params; + int myRank; + MPI_Comm_rank(MPI_COMM_CODES, &myRank); - s->busy_time = 0.0; + int rc = configuration_get_value_int(&config, "PARAMS", "local_vc_size", anno, &p->local_vc_size); + if(rc) { + p->local_vc_size = 1024; + if(!myRank) + fprintf(stderr, "Buffer size of local channels not specified, setting to %d\n", p->local_vc_size); + } - s->fwd_events = 0; - s->rev_events = 0; + rc = configuration_get_value_int(&config, "PARAMS", "global_vc_size", anno, &p->global_vc_size); + if(rc) { + p->global_vc_size = 2048; + if(!myRank) + fprintf(stderr, "Buffer size of global channels not specified, setting to %d\n", p->global_vc_size); + } - rc_stack_create(&s->st); - s->vc_occupancy = (int*)calloc(num_qos_levels, sizeof(int)); //1 vc times the number of qos levels - s->last_buf_full = 0.0; + rc = configuration_get_value_int(&config, "PARAMS", "cn_vc_size", anno, &p->cn_vc_size); + if(rc) { + p->cn_vc_size = 1024; + if(!myRank) + fprintf(stderr, "Buffer size of compute node channels not specified, setting to %d\n", p->cn_vc_size); + } - s->terminal_length = (int*)calloc(num_qos_levels, sizeof(int)); //1 vc times number of qos levels + rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", anno, &p->chunk_size); + if(rc) { + p->chunk_size = 512; + if(!myRank) + fprintf(stderr, "Chunk size for packets is specified, setting to %d\n", p->chunk_size); + } - /* Whether the virtual channel group is active or over-bw*/ - s->qos_status = (int*)calloc(num_qos_levels, sizeof(int)); - - /* How much data has been transmitted on the virtual channel group within - * the window */ - s->qos_data = (int*)calloc(num_qos_levels, sizeof(int)); - - for(i = 0; i < num_qos_levels; i++) - { - s->qos_data[i] = 0; - s->qos_status[i] = Q_ACTIVE; - s->vc_occupancy[i]=0; + rc = configuration_get_value_double(&config, "PARAMS", "local_bandwidth", anno, &p->local_bandwidth); + if(rc) { + p->local_bandwidth = 5.25; + if(!myRank) + fprintf(stderr, "Bandwidth of local channels not specified, setting to %lf\n", p->local_bandwidth); } - s->last_qos_lvl = 0; - s->rank_tbl = NULL; - s->terminal_msgs = - (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*)); - s->terminal_msgs_tail = - (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*)); + rc = configuration_get_value_double(&config, "PARAMS", "global_bandwidth", anno, &p->global_bandwidth); + if(rc) { + p->global_bandwidth = 4.7; + if(!myRank) + fprintf(stderr, "Bandwidth of global channels not specified, setting to %lf\n", p->global_bandwidth); + } - for(int i = 0; i < num_qos_levels; i++) - { - s->terminal_msgs[i] = NULL; - s->terminal_msgs_tail[i] = NULL; + rc = configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", anno, &p->cn_bandwidth); + if(rc) { + p->cn_bandwidth = 5.25; + if(!myRank) + fprintf(stderr, "Bandwidth of compute node channels not specified, setting to %lf\n", p->cn_bandwidth); } - s->in_send_loop = 0; - s->issueIdle = 0; - /*if(s->terminal_id == 0) - { - char term_bw_log[64]; - sprintf(term_bw_log, "terminal-bw-tracker"); - dragonfly_term_bw_log = fopen(term_bw_log, "w"); - fprintf(dragonfly_term_bw_log, "\n term-id time-stamp port-id busy-time"); - }*/ - return; -} - -/* sets up the router virtual channels, global channels, - * local channels, compute node channels */ -void router_dally_setup(router_state * r, tw_lp * lp) -{ - - char anno[MAX_NAME_LENGTH]; - codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, - &mapping_type_id, anno, &mapping_rep_id, &mapping_offset); + rc = configuration_get_value_double(&config, "PARAMS", "router_delay", anno, + &p->router_delay); + if(rc) { + p->router_delay = 100; + } - if (anno[0] == '\0'){ - r->anno = NULL; - r->params = &all_params[num_params-1]; - } else{ - r->anno = strdup(anno); - int id = configuration_get_annotation_index(anno, anno_map); - r->params = &all_params[id]; + rc = configuration_get_value_int(&config, "PARAMS", "num_qos_levels", anno, &p->num_qos_levels); + if(rc) { + p->num_qos_levels = 1; + if(!myRank) + fprintf(stderr, "Number of QOS levels not specified, setting to %d\n", p->num_qos_levels); } - // shorthand - const dragonfly_param *p = r->params; + char qos_levels_str[MAX_NAME_LENGTH]; + rc = configuration_get_value(&config, "PARAMS", "qos_bandwidth", anno, qos_levels_str, MAX_NAME_LENGTH); + p->qos_bandwidths = (int*)calloc(p->num_qos_levels, sizeof(int)); - num_routers_per_mgrp = codes_mapping_get_lp_count (lp_group_name, 1, "modelnet_dragonfly_dally_router", - NULL, 0); - int num_grp_reps = codes_mapping_get_group_reps(lp_group_name); - if(p->total_routers != num_grp_reps * num_routers_per_mgrp) - tw_error(TW_LOC, "\n Config error: num_routers specified %d total routers computed in the network %d " - "does not match with repetitions * dragonfly_router %d ", - p->num_routers, p->total_routers, num_grp_reps * num_routers_per_mgrp); + if(p->num_qos_levels > 1) + { + int total_bw = 0; + char * token; + token = strtok(qos_levels_str, ","); + int i = 0; + while(token != NULL) + { + sscanf(token, "%d", &p->qos_bandwidths[i]); + total_bw += p->qos_bandwidths[i]; + if(p->qos_bandwidths[i] <= 0) + { + tw_error(TW_LOC, "\n Invalid bandwidth levels"); + } + i++; + token = strtok(NULL,","); + } + assert(total_bw <= 100); + } + else + p->qos_bandwidths[0] = 100; + rc = configuration_get_value_double(&config, "PARAMS", "max_qos_monitor", anno, &max_qos_monitor); + if(rc) { + if(!myRank) + fprintf(stderr, "Setting max_qos_monitor to %lf\n", max_qos_monitor); + } + rc = configuration_get_value_int(&config, "PARAMS", "adaptive_threshold", anno, &p->adaptive_threshold); + if (rc) { + if(!myRank) + fprintf(stderr, "Adaptive Minimal Routing Threshold not specified: setting to default = 0. (Will consider minimal and nonminimal routes based on scoring metric alone)\n"); + p->adaptive_threshold = 0; + } - r->router_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); - r->group_id=r->router_id/p->num_routers; + configuration_get_value(&config, "PARAMS", "cn_sample_file", anno, cn_sample_file, + MAX_NAME_LENGTH); + configuration_get_value(&config, "PARAMS", "rt_sample_file", anno, router_sample_file, + MAX_NAME_LENGTH); - char rtr_bw_log[128]; - sprintf(rtr_bw_log, "router-bw-tracker-%d", g_tw_mynode); - - if(dragonfly_rtr_bw_log == NULL) + char routing_str[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "routing", anno, routing_str, + MAX_NAME_LENGTH); + if(strcmp(routing_str, "minimal") == 0) + routing = MINIMAL; + else if(strcmp(routing_str, "nonminimal")==0 || + strcmp(routing_str,"non-minimal")==0) + routing = NON_MINIMAL; + else if (strcmp(routing_str, "adaptive") == 0) + routing = ADAPTIVE; + else if (strcmp(routing_str, "prog-adaptive") == 0) + routing = PROG_ADAPTIVE; + else if (strcmp(routing_str, "prog-adaptive-legacy") == 0) + routing = PROG_ADAPTIVE_LEGACY; + else { - dragonfly_rtr_bw_log = fopen(rtr_bw_log, "w+"); + if(!myRank) + fprintf(stderr, "No routing protocol specified, setting to minimal routing\n"); + routing = MINIMAL; + } - fprintf(dragonfly_rtr_bw_log, "\n router-id time-stamp port-id qos-level bw-consumed qos-status qos-data busy-time"); + rc = configuration_get_value_int(&config, "PARAMS", "global_k_picks", anno, &p->global_k_picks); + if(rc) { + p->global_k_picks = 2; + if(!myRank) + fprintf(stderr, "global_k_picks for global adaptive routing not specified, setting to %d\n",p->global_k_picks); } - //printf("\n Local router id %d global id %d ", r->router_id, lp->gid); - r->is_monitoring_bw = 0; - r->fwd_events = 0; - r->rev_events = 0; - r->ross_rsample.fwd_events = 0; - r->ross_rsample.rev_events = 0; + char scoring_str[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "route_scoring_metric", anno, scoring_str, MAX_NAME_LENGTH); + if (strcmp(scoring_str, "alpha") == 0) { + scoring = ALPHA; + } + else if (strcmp(scoring_str, "beta") == 0) { + scoring = BETA; + } + else if (strcmp(scoring_str, "gamma") == 0) { + tw_error(TW_LOC, "Gamma scoring protocol currently non-functional"); //TODO: Fix gamma scoring protocol + scoring = GAMMA; + } + else if (strcmp(scoring_str, "delta") == 0) { + scoring = DELTA; + } + else { + if(!myRank) + fprintf(stderr, "No route scoring protocol specified, setting to DELTA scoring\n"); + scoring = DELTA; + } + rc = configuration_get_value_int(&config, "PARAMS", "notification_on_hops_greater_than", anno, &p->max_hops_notify); + if (rc) { + if(!myRank) + fprintf(stderr, "Maximum hops for notifying not specified, setting to INT MAX\n"); + p->max_hops_notify = INT_MAX; + } - int num_qos_levels = p->num_qos_levels; + p->num_vcs = 4; + + if(p->num_qos_levels > 1) + p->num_vcs = p->num_qos_levels * p->num_vcs; - r->connMan = &connManagerList[r->router_id]; + rc = configuration_get_value_int(&config, "PARAMS", "num_groups", anno, &p->num_groups); + if(rc) { + tw_error(TW_LOC, "\nnum_groups not specified, Aborting\n"); + } + + rc = configuration_get_value_int(&config, "PARAMS", "num_routers", anno, &p->num_routers); + if(rc) { + tw_error(TW_LOC, "\nnum_routers not specified, Aborting\n"); + } + + rc = configuration_get_value_int(&config, "PARAMS", "num_cns_per_router", anno, &p->num_cn); + if(rc) { + if(!myRank) + fprintf(stderr,"Number of cns per router not specified, setting to %d\n", p->num_routers/2); + p->num_cn = p->num_routers/2; + } - r->global_channel = (int*)calloc(p->num_global_channels, sizeof(int)); - r->next_output_available_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); - r->cur_hist_start_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); - r->link_traffic = (int64_t*)calloc(p->radix, sizeof(int64_t)); - r->link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); - r->cur_hist_num = (int*)calloc(p->radix, sizeof(int)); - r->prev_hist_num = (int*)calloc(p->radix, sizeof(int)); + rc = configuration_get_value_int(&config, "PARAMS", "num_global_channels", anno, &p->num_global_channels); + if(rc) { + if(!myRank) + fprintf(stderr,"Number of global channels per router not specified, setting to 10\n"); + p->num_global_channels = 10; + } + p->intra_grp_radix = p->num_routers -1; //TODO allow for parallel connections + p->radix = p->intra_grp_radix + p->num_global_channels + p->num_cn; + p->total_routers = p->num_groups * p->num_routers; + p->total_terminals = p->total_routers * p->num_cn; + - r->stalled_chunks = (unsigned long*)calloc(p->radix, sizeof(unsigned long)); + //setup Connection Managers for each router + for(int i = 0; i < p->total_routers; i++) + { + int src_id_global = i; + int src_id_local = i % p->num_routers; + int src_group = i / p->num_routers; - r->vc_occupancy = (int**)calloc(p->radix , sizeof(int*)); - r->in_send_loop = (int*)calloc(p->radix, sizeof(int)); - r->qos_data = (int**)calloc(p->radix, sizeof(int*)); - r->last_qos_lvl = (int*)calloc(p->radix, sizeof(int)); - r->qos_status = (int**)calloc(p->radix, sizeof(int*)); - r->pending_msgs = - (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**)); - r->pending_msgs_tail = - (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**)); - r->queued_msgs = - (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**)); - r->queued_msgs_tail = - (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**)); - r->queued_count = (int*)calloc(p->radix, sizeof(int)); - r->last_buf_full = (tw_stime*)calloc(p->radix, sizeof(tw_stime*)); - r->busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); - r->busy_time_sample = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + ConnectionManager conman = ConnectionManager(src_id_local, src_id_global, src_group, p->intra_grp_radix, p->num_global_channels, p->num_cn, p->num_routers); + connManagerList.push_back(conman); + } - /* set up for ROSS stats sampling */ - r->link_traffic_ross_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); - r->busy_time_ross_sample = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); - if (g_st_model_stats) - lp->model_types->mstat_sz = sizeof(tw_lpid) + (sizeof(int64_t) + sizeof(tw_stime)) * p->radix; - if (g_st_use_analysis_lps && g_st_model_stats) - lp->model_types->sample_struct_sz = sizeof(struct dfly_router_sample) + (sizeof(tw_stime) + sizeof(int64_t)) * p->radix; - r->ross_rsample.busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); - r->ross_rsample.link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); + // read intra group connections, store from a router's perspective + // all links to the same router form a vector + char intraFile[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "intra-group-connections", + anno, intraFile, MAX_NAME_LENGTH); + if (strlen(intraFile) <= 0) { + tw_error(TW_LOC, "Intra group connections file not specified. Aborting"); + } + FILE *groupFile = fopen(intraFile, "rb"); + if (!groupFile) + tw_error(TW_LOC, "intra-group file not found "); - rc_stack_create(&r->st); + if (!myRank) + fprintf(stderr, "Reading intra-group connectivity file: %s\n", intraFile); - for(int i=0; i < p->radix; i++) + IntraGroupLink newLink; + while (fread(&newLink, sizeof(IntraGroupLink), 1, groupFile) != 0 ) { + int src_id_local = newLink.src; + int dest_id_local = newLink.dest; + + for(int i = 0; i < p->total_routers; i++) + { + int group_id = i/p->num_routers; + if (i % p->num_routers == src_id_local) + { + int dest_id_gloabl = group_id * p->num_routers + dest_id_local; + connManagerList[i].add_connection(dest_id_gloabl, CONN_LOCAL); + } + } + } + fclose(groupFile); + + //terminal assignment + for(int i = 0; i < p->total_terminals; i++) { - // Set credit & router occupancy - r->last_buf_full[i] = 0.0; - r->busy_time[i] = 0.0; - r->busy_time_sample[i] = 0.0; - r->next_output_available_time[i]=0; - r->last_qos_lvl[i] = 0; - r->cur_hist_start_time[i] = 0; - r->link_traffic[i]=0; - r->link_traffic_sample[i] = 0; - r->cur_hist_num[i] = 0; - r->prev_hist_num[i] = 0; - r->queued_count[i] = 0; - r->in_send_loop[i] = 0; - r->vc_occupancy[i] = (int*)calloc(p->num_vcs, sizeof(int)); - // printf("\n Number of vcs %d for radix %d ", p->num_vcs, p->radix); - r->pending_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); - r->pending_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); - r->queued_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); - r->queued_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs, - sizeof(terminal_dally_message_list*)); - r->qos_status[i] = (int*)calloc(num_qos_levels, sizeof(int)); - r->qos_data[i] = (int*)calloc(num_qos_levels, sizeof(int)); - for(int j = 0; j < num_qos_levels; j++) - { - r->qos_status[i][j] = Q_ACTIVE; - r->qos_data[i][j] = 0; - } - for(int j = 0; j < p->num_vcs; j++) - { - r->pending_msgs[i][j] = NULL; - r->pending_msgs_tail[i][j] = NULL; - r->queued_msgs[i][j] = NULL; - r->queued_msgs_tail[i][j] = NULL; - } + int assigned_router_id = (int) i / p->num_cn; + // int assigned_group_id = assigned_router_id / p->num_routers; + connManagerList[assigned_router_id].add_connection(i, CONN_TERMINAL); } - r->connMan->solidify_connections(); - - return; -} + // read inter group connections, store from a router's perspective + // also create a group level table that tells all the connecting routers + char interFile[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "inter-group-connections", + anno, interFile, MAX_NAME_LENGTH); + if(strlen(interFile) <= 0) { + tw_error(TW_LOC, "Inter group connections file not specified. Aborting"); + } + FILE *systemFile = fopen(interFile, "rb"); + if(!myRank) + { + fprintf(stderr, "Reading inter-group connectivity file: %s\n", interFile); + fprintf(stderr, "\n Total routers %d total groups %d ", p->total_routers, p->num_groups); + } + connectionList.resize(p->num_groups); + for(int g = 0; g < connectionList.size(); g++) { + connectionList[g].resize(p->num_groups); + } -/* dragonfly packet event , generates a dragonfly packet on the compute node */ -static tw_stime dragonfly_dally_packet_event( - model_net_request const * req, - uint64_t message_offset, - uint64_t packet_size, - tw_stime offset, - mn_sched_params const * sched_params, - void const * remote_event, - void const * self_event, - tw_lp *sender, - int is_last_pckt) -{ - (void)message_offset; - (void)sched_params; - tw_event * e_new; - tw_stime xfer_to_nic_time; - terminal_dally_message * msg; - char* tmp_ptr; + InterGroupLink newInterLink; + while (fread(&newInterLink, sizeof(InterGroupLink), 1, systemFile) != 0) { + int src_id_global = newInterLink.src; + int src_group_id = src_id_global / p->num_routers; + int dest_id_global = newInterLink.dest; + int dest_group_id = dest_id_global / p->num_routers; - xfer_to_nic_time = codes_local_latency(sender); - //e_new = tw_event_new(sender->gid, xfer_to_nic_time+offset, sender); - //msg = tw_event_data(e_new); - e_new = model_net_method_event_new(sender->gid, xfer_to_nic_time+offset, - sender, DRAGONFLY_DALLY, (void**)&msg, (void**)&tmp_ptr); - strcpy(msg->category, req->category); - msg->final_dest_gid = req->final_dest_lp; - msg->total_size = req->msg_size; - msg->sender_lp=req->src_lp; - msg->sender_mn_lp = sender->gid; - msg->packet_size = packet_size; - msg->travel_start_time = tw_now(sender); - msg->remote_event_size_bytes = 0; - msg->local_event_size_bytes = 0; - msg->type = T_GENERATE; - msg->dest_terminal_lpid = req->dest_mn_lp; - msg->dfdally_dest_terminal_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_lpid,0,0); - msg->message_id = req->msg_id; - msg->is_pull = req->is_pull; - msg->pull_size = req->pull_size; - msg->magic = terminal_magic_num; - msg->msg_start_time = req->msg_start_time; + connManagerList[src_id_global].add_connection(dest_id_global, CONN_GLOBAL); - if(is_last_pckt) /* Its the last packet so pass in remote and local event information*/ - { - if(req->remote_event_size > 0) - { - msg->remote_event_size_bytes = req->remote_event_size; - memcpy(tmp_ptr, remote_event, req->remote_event_size); - tmp_ptr += req->remote_event_size; + int r; + for (r = 0; r < connectionList[src_group_id][dest_group_id].size(); r++) { + if (connectionList[src_group_id][dest_group_id][r] == newInterLink.src) + break; } - if(req->self_event_size > 0) - { - msg->local_event_size_bytes = req->self_event_size; - memcpy(tmp_ptr, self_event, req->self_event_size); - tmp_ptr += req->self_event_size; + if (r == connectionList[src_group_id][dest_group_id].size()) { + connectionList[src_group_id][dest_group_id].push_back(newInterLink.src); } - } - //printf("\n dragonfly remote event %d local event %d last packet %d %lf ", msg->remote_event_size_bytes, msg->local_event_size_bytes, is_last_pckt, xfer_to_nic_time); - tw_event_send(e_new); - return xfer_to_nic_time; -} - -/* dragonfly packet event reverse handler */ -static void dragonfly_dally_packet_event_rc(tw_lp *sender) -{ - codes_local_latency_reverse(sender); - return; -} - -/*When a packet is sent from the current router and a buffer slot becomes available, a credit is sent back to schedule another packet event*/ -static void router_credit_send(router_state * s, terminal_dally_message * msg, - tw_lp * lp, int sq, short* rng_counter) { - tw_event * buf_e; - tw_stime ts; - terminal_dally_message * buf_msg; - - int dest = 0, type = R_BUFFER; - int is_terminal = 0; + } - const dragonfly_param *p = s->params; - - // Notify sender terminal about available buffer space - if(msg->last_hop == TERMINAL) { - dest = msg->src_terminal_id; - type = T_BUFFER; - is_terminal = 1; - } - else if(msg->last_hop == GLOBAL - || msg->last_hop == LOCAL - || msg->last_hop == ROOT) + if (DUMP_CONNECTIONS) { - dest = msg->intm_lp_id; - } - else - printf("\n Invalid message type"); + if (!myRank) { + for (int i = 0; i < connManagerList.size(); i++) + { + connManagerList[i].print_connections(); + } + } + } - (*rng_counter)++; - ts = g_tw_lookahead + p->credit_delay + tw_rand_unif(lp->rng); + fclose(systemFile); - if (is_terminal) { - buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_DALLY, - (void**)&buf_msg, NULL); - buf_msg->magic = terminal_magic_num; - } - else { - buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_DALLY_ROUTER, - (void**)&buf_msg, NULL); - buf_msg->magic = router_magic_num; - } - - buf_msg->origin_router_id = s->router_id; - if(sq == -1) { - buf_msg->vc_index = msg->vc_index; - buf_msg->output_chan = msg->output_chan; - } else { - buf_msg->vc_index = msg->saved_vc; - buf_msg->output_chan = msg->saved_channel; + if(!myRank) { + fprintf(stderr, "\n Total nodes %d routers %d groups %d routers per group %d radix %d\n\n", + p->num_cn * p->total_routers, p->total_routers, p->num_groups, + p->num_routers, p->radix); } - strcpy(buf_msg->category, msg->category); - buf_msg->type = type; - - tw_event_send(buf_e); - return; -} -static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) -{ - int num_qos_levels = s->params->num_qos_levels; - if(bf->c1) - s->is_monitoring_bw = 0; - - s->total_gen_size -= msg->packet_size; - s->packet_gen--; - packet_gen--; - s->packet_counter--; + rc = configuration_get_value_double(&config, "PARAMS", "cn_delay", anno, &p->cn_delay); + if (rc) { + p->cn_delay = bytes_to_ns(p->chunk_size, p->cn_bandwidth); + if(!myRank) + fprintf(stderr, "cn_delay not specified, using default calculation: %.2f\n", p->cn_delay); + } - if(bf->c2) - num_local_packets_sr--; - if(bf->c3) - num_local_packets_sg--; - if(bf->c4) - num_remote_packets--; + rc = configuration_get_value_double(&config, "PARAMS", "local_delay", anno, &p->local_delay); + if (rc) { + p->local_delay = bytes_to_ns(p->chunk_size, p->local_bandwidth); + if(!myRank) + fprintf(stderr, "local_delay not specified, using default calculation: %.2f\n", p->local_delay); + } + rc = configuration_get_value_double(&config, "PARAMS", "global_delay", anno, &p->global_delay); + if (rc) { + p->global_delay = bytes_to_ns(p->chunk_size, p->global_bandwidth); + if(!myRank) + fprintf(stderr, "global_delay not specified, using default calculation: %.2f\n", p->global_delay); + } - for(int i = 0; i < msg->num_rngs; i++) - tw_rand_reverse_unif(lp->rng); + //CREDIT DELAY CONFIGURATION LOGIC ------------ + rc = configuration_get_value_int(&config, "PARAMS", "credit_size", anno, &p->credit_size); + if (rc) { + p->credit_size = 8; + if(!myRank) + fprintf(stderr, "credit_size not specified, using default: %d\n", p->credit_size); + } - for(int i = 0; i < msg->num_cll; i++) - codes_local_latency_reverse(lp); + double general_credit_delay; + int credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "credit_delay", anno, &general_credit_delay); + int local_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "local_credit_delay", anno, &p->local_credit_delay); + int global_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "global_credit_delay", anno, &p->global_credit_delay); + int cn_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "cn_credit_delay", anno, &p->cn_credit_delay); - int num_chunks = msg->packet_size/s->params->chunk_size; - if(msg->packet_size < s->params->chunk_size) - num_chunks++; + int auto_credit_delay_flag; + rc = configuration_get_value_int(&config, "PARAMS", "auto_credit_delay", anno, &auto_credit_delay_flag); + if (rc) { + auto_credit_delay_flag = 0; + } + else { + if(!myRank && auto_credit_delay_flag) + fprintf(stderr, "auto_credit_delay flag enabled. All credit delays will be calculated based on their respective bandwidths\n"); + } - int i; - int vcg = 0; - if(num_qos_levels > 1) - { - vcg = get_vcg_from_category(msg); - assert(vcg == Q_HIGH || vcg == Q_MEDIUM); + //If the user specifies a general "credit_delay" AND any of the more specific credit delays, throw an error to make sure they correct their configuration + if (!credit_delay_unset && !(local_credit_delay_unset || global_credit_delay_unset || cn_credit_delay_unset)) + tw_error(TW_LOC, "\nCannot set both a general credit delay and specific (local/global/cn) credit delays. Check configuration file."); + + //If the user specifies ANY credit delays general or otherwise AND has the auto credit delay flag enabled, throw an error to make sure they correct the conflicting configuration + if ((!credit_delay_unset || !local_credit_delay_unset || !global_credit_delay_unset || !cn_credit_delay_unset) && auto_credit_delay_flag) + tw_error(TW_LOC, "\nCannot set both a credit delay (general or specific) and also enable auto credit delay calculation. Check Configuration file."); + + //If the user doesn't specify either general or specific credit delays - calculate credit delay based on local bandwidth. + //This is old legacy behavior that is left in to make sure that the credit delay configurations of old aren't semantically different + //Other possible way to program this would be to make each credit delay be set based on their respective bandwidths but this semantically + //changes the behavior of old configuration files. (although it would be more accurate) + if (credit_delay_unset && local_credit_delay_unset && global_credit_delay_unset && cn_credit_delay_unset && !auto_credit_delay_flag) { + p->local_credit_delay = bytes_to_ns(p->credit_size, p->local_bandwidth); + p->global_credit_delay = p->local_credit_delay; + p->cn_credit_delay = p->local_credit_delay; + if(!myRank) + fprintf(stderr, "no credit_delay specified - all credit delays set to %.2f\n",p->local_credit_delay); + } + //If the user doesn't specify a general credit delay but leaves any of the specific credit delay values unset, then we need to set those (the above conditional handles if none of them had been set) + else if (credit_delay_unset) { + if (local_credit_delay_unset) { + p->local_credit_delay = bytes_to_ns(p->credit_size, p->local_bandwidth); + if(!myRank && !auto_credit_delay_flag) //if the auto credit delay flag is true then we've already printed what we're going to do + fprintf(stderr, "local_credit_delay not specified, using calculation based on local bandwidth: %.2f\n", p->local_credit_delay); + } + if (global_credit_delay_unset) { + p->global_credit_delay = bytes_to_ns(p->credit_size, p->global_bandwidth); + if(!myRank && !auto_credit_delay_flag) + fprintf(stderr, "global_credit_delay not specified, using calculation based on global bandwidth: %.2f\n", p->global_credit_delay); + } + if (cn_credit_delay_unset) { + p->cn_credit_delay = bytes_to_ns(p->credit_size, p->cn_bandwidth); + if(!myRank && !auto_credit_delay_flag) + fprintf(stderr, "cn_credit_delay not specified, using calculation based on cn bandwidth: %.2f\n", p->cn_credit_delay); + } } - assert(vcg < num_qos_levels); + //If the user specifies a general credit delay (but didn't specify any specific credit delays) then we set all specific credit delays to the general + else if (!credit_delay_unset) { + p->local_credit_delay = general_credit_delay; + p->global_credit_delay = general_credit_delay; + p->cn_credit_delay = general_credit_delay; + + if(!myRank) + fprintf(stderr, "general credit_delay specified - all credit delays set to %.2f\n",general_credit_delay); + } + //END CREDIT DELAY CONFIGURATION LOGIC ---------------- - for(i = 0; i < num_chunks; i++) { - delete_terminal_dally_message_list(return_tail(s->terminal_msgs, - s->terminal_msgs_tail, vcg)); - s->terminal_length[vcg] -= s->params->chunk_size; + if (PRINT_CONFIG && !myRank) { + dragonfly_print_params(p,stderr); } - if(bf->c5) { - s->in_send_loop = 0; - } - if (bf->c11) { - s->issueIdle = 0; - s->stalled_chunks--; - if(bf->c8) { - s->last_buf_full = msg->saved_busy_time; - } - } - struct mn_stats* stat; - stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); - stat->send_count--; - stat->send_bytes -= msg->packet_size; - stat->send_time -= (1/s->params->cn_bandwidth) * msg->packet_size; + stored_params = p; } -/* generates packet at the current dragonfly compute node */ -static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { - - msg->num_rngs = 0; - msg->num_cll = 0; +void dragonfly_dally_configure() { + anno_map = codes_mapping_get_lp_anno_map(LP_CONFIG_NM_TERM); + assert(anno_map); + num_params = anno_map->num_annos + (anno_map->has_unanno_lp > 0); + all_params = (dragonfly_param *)calloc(num_params, sizeof(*all_params)); - packet_gen++; - int num_qos_levels = s->params->num_qos_levels; - - if(num_qos_levels > 1) - { - tw_lpid router_id; - codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, - &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); - codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 0, - s->router_id / num_routers_per_mgrp, s->router_id % num_routers_per_mgrp, &router_id); - if(s->is_monitoring_bw == 0) - { - bf->c1 = 1; - /* Issue an event on both terminal and router to monitor bandwidth */ - msg->num_cll++; - tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); - terminal_dally_message * m; - tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, DRAGONFLY_DALLY, - (void**)&m, NULL); - m->type = T_BANDWIDTH; - m->magic = terminal_magic_num; - s->is_monitoring_bw = 1; - tw_event_send(e); - } + for (int i = 0; i < anno_map->num_annos; i++) { + const char * anno = anno_map->annotations[i].ptr; + dragonfly_read_config(anno, &all_params[i]); } - s->packet_gen++; - s->total_gen_size += msg->packet_size; - - tw_stime ts, nic_ts; - - assert(lp->gid != msg->dest_terminal_lpid); - const dragonfly_param *p = s->params; - - int vcg = 0; - if(num_qos_levels > 1) - { - vcg = get_vcg_from_category(msg); - assert(vcg == Q_HIGH || vcg == Q_MEDIUM); + if (anno_map->has_unanno_lp > 0){ + dragonfly_read_config(NULL, &all_params[anno_map->num_annos]); } - assert(vcg < num_qos_levels); - - int total_event_size; - uint64_t num_chunks = msg->packet_size / p->chunk_size; - double cn_delay = s->params->cn_delay; - - if (msg->packet_size < s->params->chunk_size) - num_chunks++; - - if(msg->packet_size < s->params->chunk_size) - cn_delay = bytes_to_ns(msg->packet_size % s->params->chunk_size, s->params->cn_bandwidth); +#ifdef ENABLE_CORTEX + model_net_topology = dragonfly_dally_cortex_topology; +#endif +} - int dest_router_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_lpid, 0, 0) / s->params->num_cn; - int dest_grp_id = dest_router_id / s->params->num_routers; - int src_grp_id = s->router_id / s->params->num_routers; +/* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */ +void dragonfly_dally_report_stats() +{ + long long avg_hops, total_finished_packets, total_finished_chunks; + long long total_finished_msgs, final_msg_sz; + tw_stime avg_time, max_time; + int total_minimal_packets, total_nonmin_packets; + long total_gen, total_fin; + long total_local_packets_sr, total_local_packets_sg, total_remote_packets; - if(src_grp_id == dest_grp_id) - { - if(dest_router_id == s->router_id) - { - bf->c2 = 1; - num_local_packets_sr++; - } - else - { - bf->c3 = 1; - num_local_packets_sg++; - } - } - else - { - bf->c4 = 1; - num_remote_packets++; - } - msg->num_rngs++; - nic_ts = g_tw_lookahead + (num_chunks * cn_delay) + tw_rand_unif(lp->rng); + MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &dragonfly_total_time, &avg_time, 1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &dragonfly_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES); - msg->packet_ID = s->packet_counter; - s->packet_counter++; - msg->my_N_hop = 0; - msg->my_l_hop = 0; - msg->my_g_hop = 0; - - - for(int i = 0; i < num_chunks; i++) + MPI_Reduce( &packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &num_local_packets_sr, &total_local_packets_sr, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &num_local_packets_sg, &total_local_packets_sg, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &num_remote_packets, &total_remote_packets, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + if(routing == ADAPTIVE || routing == PROG_ADAPTIVE || routing == PROG_ADAPTIVE_LEGACY || SHOW_ADAP_STATS) { - terminal_dally_message_list *cur_chunk = (terminal_dally_message_list*)calloc(1, - sizeof(terminal_dally_message_list)); - msg->origin_router_id = s->router_id; - init_terminal_dally_message_list(cur_chunk, msg); - - if(msg->remote_event_size_bytes + msg->local_event_size_bytes > 0) { - cur_chunk->event_data = (char*)calloc(1, - msg->remote_event_size_bytes + msg->local_event_size_bytes); - } - - void * m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); - if (msg->remote_event_size_bytes){ - memcpy(cur_chunk->event_data, m_data_src, msg->remote_event_size_bytes); - } - if (msg->local_event_size_bytes){ - m_data_src = (char*)m_data_src + msg->remote_event_size_bytes; - memcpy((char*)cur_chunk->event_data + msg->remote_event_size_bytes, - m_data_src, msg->local_event_size_bytes); - } - - cur_chunk->msg.output_chan = vcg; - cur_chunk->msg.chunk_id = i; - cur_chunk->msg.origin_router_id = s->router_id; - append_to_terminal_dally_message_list(s->terminal_msgs, s->terminal_msgs_tail, - vcg, cur_chunk); - s->terminal_length[vcg] += s->params->chunk_size; + MPI_Reduce(&minimal_count, &total_minimal_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&nonmin_count, &total_nonmin_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); } - if(s->terminal_length[vcg] < s->params->cn_vc_size) { - model_net_method_idle_event(nic_ts, 0, lp); - } else { - bf->c11 = 1; - s->issueIdle = 1; - s->stalled_chunks++; + /* print statistics */ + if(!g_tw_mynode) + { + if (PRINT_CONFIG) + dragonfly_print_params(stored_params, NULL); - //this block was missing from when QOS was added - readded 5-21-19 - if(s->last_buf_full == 0.0) - { - bf->c8 = 1; - msg->saved_busy_time = s->last_buf_full; - /* TODO: Assumes a single vc from terminal to router */ - s->last_buf_full = tw_now(lp); - } - } + printf("\nAverage number of hops traversed %f average chunk latency %lf us maximum chunk latency %lf us avg message size %lf bytes finished messages %lld finished chunks %lld\n", + (float)avg_hops/total_finished_chunks, (float) avg_time/total_finished_chunks/1000, max_time/1000, (float)final_msg_sz/total_finished_msgs, total_finished_msgs, total_finished_chunks); + if(routing == ADAPTIVE || routing == PROG_ADAPTIVE || routing == PROG_ADAPTIVE_LEGACY || SHOW_ADAP_STATS) + printf("\nADAPTIVE ROUTING STATS: %d chunks routed minimally %d chunks routed non-minimally completed packets %lld \n", + total_minimal_packets, total_nonmin_packets, total_finished_chunks); - if(s->in_send_loop == 0) { - bf->c5 = 1; - msg->num_cll++; - ts = codes_local_latency(lp); - terminal_dally_message *m; - tw_event* e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY, - (void**)&m, NULL); - m->type = T_SEND; - m->magic = terminal_magic_num; - s->in_send_loop = 1; - tw_event_send(e); + printf("\nTotal packets generated %ld finished %ld Locally routed- same router %ld different-router %ld Remote (inter-group) %ld \n", total_gen, total_fin, total_local_packets_sr, total_local_packets_sg, total_remote_packets); } - - total_event_size = model_net_get_msg_sz(DRAGONFLY_DALLY) + - msg->remote_event_size_bytes + msg->local_event_size_bytes; - mn_stats* stat; - stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); - stat->send_count++; - stat->send_bytes += msg->packet_size; - stat->send_time += (1/p->cn_bandwidth) * msg->packet_size; - if(stat->max_event_size < total_event_size) - stat->max_event_size = total_event_size; - return; } +int get_vcg_from_category(terminal_dally_message * msg) +{ + if(strcmp(msg->category, "high") == 0) + return Q_HIGH; + else if(strcmp(msg->category, "medium") == 0) + return Q_MEDIUM; + else + tw_error(TW_LOC, "\n priority needs to be specified with qos_levels>1 %d", msg->category); +} + static int get_term_bandwidth_consumption(terminal_state * s, int qos_lvl) { assert(qos_lvl >= Q_HIGH && qos_lvl <= Q_LOW); @@ -1940,79 +1845,188 @@ static int get_term_bandwidth_consumption(terminal_state * s, int qos_lvl) // printf("\n At terminal %lf max bytes %d percent %d ", max_bytes_per_win, s->qos_data[qos_lvl], percent_bw); return percent_bw; } -static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) + +/* TODO: Differentiate between local and global bandwidths. */ +static int get_rtr_bandwidth_consumption(router_state * s, int qos_lvl, int output_port) { - int num_qos_levels = s->params->num_qos_levels; + assert(qos_lvl >= Q_HIGH && qos_lvl <= Q_LOW); + assert(output_port < s->params->intra_grp_radix + s->params->num_global_channels + s->params->num_cn); - int vcs_per_qos = s->params->num_vcs / num_qos_levels; - int output_port = msg->vc_index; - int vcg = 0; - int base_limit = 0; - - int chunk_size = s->params->chunk_size; - int bw_consumption[num_qos_levels]; - /* First make sure the bandwidth consumptions are up to date. */ - if(BW_MONITOR == 1) + int bandwidth = s->params->cn_bandwidth; + if (output_port < s->params->intra_grp_radix) + bandwidth = s->params->local_bandwidth; + else if (output_port < s->params->intra_grp_radix + s->params->num_global_channels) + bandwidth = s->params->global_bandwidth; + + /* conversion into bytes from GiB */ + double max_bw = bandwidth * 1024.0 * 1024.0 * 1024.0; + double max_bw_per_ns = max_bw / (1000.0 * 1000.0 * 1000.0); + double max_bytes_per_win = max_bw_per_ns * bw_reset_window; + + /* bw_consumed would be in Gigabytes per second. */ +// tw_stime reset_window_s = ns_to_s(bw_reset_window); +// double bw_gib = bytes_to_gigabytes(s->qos_data[output_port][qos_lvl]); +// double bw_consumed = ((double)bw_gib / (double)reset_window_s); + int percent_bw = (((double)s->qos_data[output_port][qos_lvl]) / max_bytes_per_win) * 100; +// printf("\n percent bw consumed by qos_lvl %d is %d bytes transferred %d max_bw %lf ", qos_lvl, percent_bw, s->qos_data[output_port][qos_lvl], max_bw_per_ns); + return percent_bw; +} + +void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + for(int i = 0 ; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + int num_qos_levels = s->params->num_qos_levels; + + if(msg->rc_is_qos_set == 1) { - for(int k = 0; k < num_qos_levels; k++) + for(int i = 0; i < num_qos_levels; i++) { - if(s->qos_status[output_port][k] != Q_OVERBW) - { - bw_consumption[k] = get_rtr_bandwidth_consumption(s, k, output_port); - if(bw_consumption[k] > s->params->qos_bandwidths[k]) - { - // printf("\n Router %d QoS %d exceeded allowed bandwidth %d ", s->router_id, k, bw_consumption[k]); - if(k == 0) - msg->qos_reset1 = 1; - else if(k == 1) - msg->qos_reset2 = 1; - - s->qos_status[output_port][k] = Q_OVERBW; - } - } + s->qos_data[i] = msg->rc_qos_data[i]; + s->qos_status[i] = msg->rc_qos_status[i]; } - int vc_size = s->params->global_vc_size; - if(output_port < s->params->intra_grp_radix) - vc_size = s->params->local_vc_size; - /* TODO: If none of the vcg is exceeding bandwidth limit then select high - * priority traffic first. */ - for(int i = 0; i < num_qos_levels; i++) + free(msg->rc_qos_data); + free(msg->rc_qos_status); + msg->rc_is_qos_set = 0; + } + +} +/* resets the bandwidth numbers recorded so far */ +void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + + msg->num_cll = 0; + msg->num_rngs = 0; + int num_qos_levels = s->params->num_qos_levels; + + //RC data storage start. + //Allocate memory here for these pointers that are stored in the events. FREE THESE IN RC OR IN COMMIT_F + msg->rc_qos_data = (unsigned long long *) calloc(num_qos_levels, sizeof(unsigned long long)); + msg->rc_qos_status = (int *) calloc(num_qos_levels, sizeof(int)); + + //store qos data and status into the arrays. Pointers to the arrays are stored in events. + for(int i = 0; i < num_qos_levels; i++) + { + msg->rc_qos_data[i] = s->qos_data[i]; + msg->rc_qos_status[i] = s->qos_status[i]; + } + msg->rc_is_qos_set = 1; + //RC data storage end. + + /* Reset the qos status and bandwidth consumption. */ + for(int i = 0; i < num_qos_levels; i++) + { + s->qos_status[i] = Q_ACTIVE; + s->qos_data[i] = 0; + } + + if(tw_now(lp) > max_qos_monitor) + return; + + msg->num_cll++; + terminal_dally_message * m; + tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); + tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, DRAGONFLY_DALLY, + (void**)&m, NULL); + m->type = T_BANDWIDTH; + m->magic = terminal_magic_num; + tw_event_send(e); +} + +void issue_rtr_bw_monitor_event_rc(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) +{ + int radix = s->params->radix; + int num_qos_levels = s->params->num_qos_levels; + + for(int i = 0 ; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + if(msg->rc_is_qos_set == 1) + { + for(int i = 0; i < radix; i++) { - if(s->qos_status[output_port][i] == Q_ACTIVE) + for(int j = 0; j < num_qos_levels; j++) { - int base_limit = i * vcs_per_qos; - for(int k = base_limit; k < base_limit + vcs_per_qos; k ++) - { - if(s->pending_msgs[output_port][k] != NULL) - return k; - } + s->qos_data[i][j] = *(indexer2d(msg->rc_qos_data, i, j, radix, num_qos_levels)); + s->qos_status[i][j] = *(indexer2d(msg->rc_qos_status, i, j, radix, num_qos_levels)); } } + + free(msg->rc_qos_data); + free(msg->rc_qos_status); + msg->rc_is_qos_set = 0; } - - /* All vcgs are exceeding their bandwidth limits*/ - msg->last_saved_qos = s->last_qos_lvl[output_port]; - int next_rr_vcg = (s->last_qos_lvl[output_port] + 1) % num_qos_levels; +} +void issue_rtr_bw_monitor_event(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp) +{ + msg->num_cll = 0; + msg->num_rngs = 0; - for(int i = 0; i < num_qos_levels; i++) + int radix = s->params->radix; + int num_qos_levels = s->params->num_qos_levels; + + + //RC data storage start. + //Allocate memory here for these pointers that are stored in the events. FREE THESE IN RC OR IN COMMIT_F + msg->rc_qos_data = (unsigned long long *) calloc(radix * num_qos_levels, sizeof(unsigned long long)); + msg->rc_qos_status = (int *) calloc(radix * num_qos_levels, sizeof(int)); + + //store qos data and status into the arrays. Pointers to the arrays are stored in events. + for(int i = 0; i < radix; i++) { - base_limit = next_rr_vcg * vcs_per_qos; - for(int k = base_limit; k < base_limit + vcs_per_qos; k++) + for(int j = 0; j < num_qos_levels; j++) { - if(s->pending_msgs[output_port][k] != NULL) - { - if(msg->last_saved_qos < 0) - msg->last_saved_qos = s->last_qos_lvl[output_port]; + *(indexer2d(msg->rc_qos_data, i, j, radix, num_qos_levels)) = s->qos_data[i][j]; + *(indexer2d(msg->rc_qos_status, i, j, radix, num_qos_levels)) = s->qos_status[i][j]; + } + } + msg->rc_is_qos_set = 1; + //RC data storage end. - s->last_qos_lvl[output_port] = next_rr_vcg; - return k; + + for(int i = 0; i < radix; i++) + { + for(int j = 0; j < num_qos_levels; j++) + { + int bw_consumed = get_rtr_bandwidth_consumption(s, j, i); + + #if DEBUG_QOS == 1 + if(dragonfly_rtr_bw_log != NULL) + { + if(s->qos_data[j][k] > 0) + { + fprintf(dragonfly_rtr_bw_log, "\n %d %f %d %d %d %d %d %f", s->router_id, tw_now(lp), i, j, bw_consumed, s->qos_status[i][j], s->qos_data[i][j], s->busy_time_sample[i]); + } } + #endif } - next_rr_vcg = (next_rr_vcg + 1) % num_qos_levels; - assert(next_rr_vcg < 2); } - return -1; + + /* Reset the qos status and bandwidth consumption. */ + for(int i = 0; i < s->params->radix; i++) + { + for(int j = 0; j < num_qos_levels; j++) + { + s->qos_status[i][j] = Q_ACTIVE; + s->qos_data[i][j] = 0; + } + s->busy_time_sample[i] = 0; + s->ross_rsample.busy_time[i] = 0; + } + + if(tw_now(lp) > max_qos_monitor) + return; + + msg->num_cll++; + tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); + terminal_dally_message *m; + tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, + DRAGONFLY_DALLY_ROUTER, (void**)&m, NULL); + m->type = R_BANDWIDTH; + m->magic = router_magic_num; + tw_event_send(e); } static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) @@ -2080,1563 +2094,1404 @@ static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_dally_message * return -1; } -static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { int num_qos_levels = s->params->num_qos_levels; - if(msg->qos_reset1) - s->qos_status[0] = Q_ACTIVE; - if(msg->qos_reset2) - s->qos_status[1] = Q_ACTIVE; - - if(msg->last_saved_qos) - s->last_qos_lvl = msg->last_saved_qos; + int vcs_per_qos = s->params->num_vcs / num_qos_levels; + int output_port = msg->vc_index; + int vcg = 0; + int base_limit = 0; + + int chunk_size = s->params->chunk_size; + int bw_consumption[num_qos_levels]; + /* First make sure the bandwidth consumptions are up to date. */ + if(BW_MONITOR == 1) + { + for(int k = 0; k < num_qos_levels; k++) + { + if(s->qos_status[output_port][k] != Q_OVERBW) + { + bw_consumption[k] = get_rtr_bandwidth_consumption(s, k, output_port); + if(bw_consumption[k] > s->params->qos_bandwidths[k]) + { + // printf("\n Router %d QoS %d exceeded allowed bandwidth %d ", s->router_id, k, bw_consumption[k]); + if(k == 0) + msg->qos_reset1 = 1; + else if(k == 1) + msg->qos_reset2 = 1; - if(bf->c1) { - s->in_send_loop = 1; - if(bf->c3) - s->last_buf_full = msg->saved_busy_time; - - return; + s->qos_status[output_port][k] = Q_OVERBW; + } + } + } + int vc_size = s->params->global_vc_size; + if(output_port < s->params->intra_grp_radix) + vc_size = s->params->local_vc_size; + + /* TODO: If none of the vcg is exceeding bandwidth limit then select high + * priority traffic first. */ + for(int i = 0; i < num_qos_levels; i++) + { + if(s->qos_status[output_port][i] == Q_ACTIVE) + { + int base_limit = i * vcs_per_qos; + for(int k = base_limit; k < base_limit + vcs_per_qos; k ++) + { + if(s->pending_msgs[output_port][k] != NULL) + return k; + } + } + } } - - int vcg = msg->saved_vc; - s->terminal_available_time = msg->saved_available_time; - - for(int i = 0; i < msg->num_cll; i++) + + /* All vcgs are exceeding their bandwidth limits*/ + msg->last_saved_qos = s->last_qos_lvl[output_port]; + int next_rr_vcg = (s->last_qos_lvl[output_port] + 1) % num_qos_levels; + + for(int i = 0; i < num_qos_levels; i++) { - codes_local_latency_reverse(lp); + base_limit = next_rr_vcg * vcs_per_qos; + for(int k = base_limit; k < base_limit + vcs_per_qos; k++) + { + if(s->pending_msgs[output_port][k] != NULL) + { + if(msg->last_saved_qos < 0) + msg->last_saved_qos = s->last_qos_lvl[output_port]; + + s->last_qos_lvl[output_port] = next_rr_vcg; + return k; + } + } + next_rr_vcg = (next_rr_vcg + 1) % num_qos_levels; + assert(next_rr_vcg < 2); } + return -1; +} - for(int i = 0; i < msg->num_rngs; i++) +void terminal_dally_commit(terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + if(msg->type == T_BANDWIDTH) { - tw_rand_reverse_unif(lp->rng); + if(msg->rc_is_qos_set == 1) { + free(msg->rc_qos_data); + free(msg->rc_qos_status); + msg->rc_is_qos_set = 0; + } } - s->terminal_length[vcg] += s->params->chunk_size; - /*TODO: MM change this to the vcg */ - s->vc_occupancy[vcg] -= s->params->chunk_size; - - terminal_dally_message_list* cur_entry = (terminal_dally_message_list *)rc_stack_pop(s->st); - - int data_size = s->params->chunk_size; - if(cur_entry->msg.packet_size < s->params->chunk_size) - data_size = cur_entry->msg.packet_size % s->params->chunk_size; - - s->qos_data[vcg] -= data_size; +} - prepend_to_terminal_dally_message_list(s->terminal_msgs, - s->terminal_msgs_tail, vcg, cur_entry); - if(bf->c4) { - s->in_send_loop = 1; - } - if(bf->c5) +void router_dally_commit(router_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) +{ + if(msg->type == R_BANDWIDTH) { - s->issueIdle = 1; - if(bf->c6) - { - s->busy_time = msg->saved_total_time; - s->last_buf_full = msg->saved_busy_time; - s->busy_time_sample = msg->saved_sample_time; - s->ross_sample.busy_time_sample = msg->saved_sample_time; - s->busy_time_ross_sample = msg->saved_busy_time_ross; + if(msg->rc_is_qos_set == 1) { + free(msg->rc_qos_data); + free(msg->rc_qos_status); + msg->rc_is_qos_set = 0; } } - return; } -/* sends the packet from the current dragonfly compute node to the attached router */ -static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) + +/* initialize a dragonfly compute node terminal */ +void terminal_dally_init( terminal_state * s, tw_lp * lp ) { - - tw_stime ts; - tw_event *e; - terminal_dally_message *m; - tw_lpid router_id; - int vcg = 0; - int num_qos_levels = s->params->num_qos_levels; - - msg->last_saved_qos = -1; - msg->qos_reset1 = -1; - msg->qos_reset2 = -1; - msg->num_rngs = 0; - msg->num_cll = 0; + s->packet_gen = 0; + s->packet_fin = 0; + s->total_gen_size = 0; + s->is_monitoring_bw = 0; - vcg = get_next_vcg(s, bf, msg, lp); - - /* For a terminal to router connection, there would be as many VCGs as number - * of VCs*/ + int i; + char anno[MAX_NAME_LENGTH]; - if(vcg == -1) { - bf->c1 = 1; - s->in_send_loop = 0; - if(!s->last_buf_full) - { - bf->c3 = 1; - msg->saved_busy_time = s->last_buf_full; - s->last_buf_full = tw_now(lp); - } - return; + // Assign the global router ID + // TODO: be annotation-aware + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, + &mapping_type_id, anno, &mapping_rep_id, &mapping_offset); + if (anno[0] == '\0'){ + s->anno = NULL; + s->params = &all_params[num_params-1]; } - - msg->saved_vc = vcg; - terminal_dally_message_list* cur_entry = s->terminal_msgs[vcg]; - int data_size = s->params->chunk_size; - uint64_t num_chunks = cur_entry->msg.packet_size/s->params->chunk_size; - if(cur_entry->msg.packet_size < s->params->chunk_size) - num_chunks++; - - tw_stime delay = s->params->cn_delay; - if((cur_entry->msg.packet_size < s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) - { - data_size = cur_entry->msg.packet_size % s->params->chunk_size; - delay = bytes_to_ns(cur_entry->msg.packet_size % s->params->chunk_size, s->params->cn_bandwidth); + else{ + s->anno = strdup(anno); + int id = configuration_get_annotation_index(anno, anno_map); + s->params = &all_params[id]; } - s->qos_data[vcg] += data_size; - - msg->saved_available_time = s->terminal_available_time; - - msg->num_rngs++; - ts = g_tw_lookahead + delay + tw_rand_unif(lp->rng); - - s->terminal_available_time = maxd(s->terminal_available_time, tw_now(lp)); - s->terminal_available_time += ts; + int num_qos_levels = s->params->num_qos_levels; + int num_lps = codes_mapping_get_lp_count(lp_group_name, 1, LP_CONFIG_NM_TERM, + s->anno, 0); - ts = s->terminal_available_time - tw_now(lp); - codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, - &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); - codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 0, - s->router_id / num_routers_per_mgrp, s->router_id % num_routers_per_mgrp, &router_id); + s->terminal_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + s->router_id=(int)s->terminal_id / (s->params->num_cn); + s->terminal_available_time = 0.0; + s->packet_counter = 0; + s->min_latency = INT_MAX; + s->max_latency = 0; - // if(s->router_id == 1) - // printf("\n Local router id %d global router id %d ", s->router_id, router_id); - // we are sending an event to the router, so no method_event here - void * remote_event; - e = model_net_method_event_new(router_id, ts, lp, - DRAGONFLY_DALLY_ROUTER, (void**)&m, &remote_event); - memcpy(m, &cur_entry->msg, sizeof(terminal_dally_message)); - if (m->remote_event_size_bytes){ - memcpy(remote_event, cur_entry->event_data, m->remote_event_size_bytes); - } + s->finished_msgs = 0; + s->finished_chunks = 0; + s->finished_packets = 0; + s->total_time = 0.0; + s->total_msg_size = 0; - m->type = R_ARRIVE; - m->src_terminal_id = lp->gid; - m->vc_index = vcg; - m->last_hop = TERMINAL; - m->magic = router_magic_num; - m->path_type = -1; - m->local_event_size_bytes = 0; - m->is_intm_visited = 0; - m->intm_grp_id = -1; - m->intm_rtr_id = -1; //for legacy prog-adaptive - tw_event_send(e); + s->stalled_chunks = 0; + s->busy_time = 0.0; + s->fwd_events = 0; + s->rev_events = 0; - if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && lp->gid == T_ID) - printf("\n Packet %llu generated at terminal %d dest %llu size %llu num chunks %llu router-id %d %llu", - cur_entry->msg.packet_ID, s->terminal_id, LLU(cur_entry->msg.dest_terminal_lpid), - LLU(cur_entry->msg.packet_size), LLU(num_chunks), s->router_id, router_id); + rc_stack_create(&s->st); + s->vc_occupancy = (int*)calloc(num_qos_levels, sizeof(int)); //1 vc times the number of qos levels + s->last_buf_full = 0.0; - if(cur_entry->msg.chunk_id == num_chunks - 1 && (cur_entry->msg.local_event_size_bytes > 0)) + s->terminal_length = (int*)calloc(num_qos_levels, sizeof(int)); //1 vc times number of qos levels + + /* Whether the virtual channel group is active or over-bw*/ + s->qos_status = (int*)calloc(num_qos_levels, sizeof(int)); + + /* How much data has been transmitted on the virtual channel group within + * the window */ + s->qos_data = (int*)calloc(num_qos_levels, sizeof(int)); + + for(i = 0; i < num_qos_levels; i++) { - msg->num_cll++; - tw_stime local_ts = codes_local_latency(lp); - tw_event *e_new = tw_event_new(cur_entry->msg.sender_lp, local_ts, lp); - void * m_new = tw_event_data(e_new); - void *local_event = (char*)cur_entry->event_data + - cur_entry->msg.remote_event_size_bytes; - memcpy(m_new, local_event, cur_entry->msg.local_event_size_bytes); - tw_event_send(e_new); + s->qos_data[i] = 0; + s->qos_status[i] = Q_ACTIVE; + s->vc_occupancy[i]=0; } - int next_vcg = 0; - - if(num_qos_levels > 1) - next_vcg = get_next_vcg(s, bf, msg, lp); - - s->vc_occupancy[vcg] += s->params->chunk_size; - cur_entry = return_head(s->terminal_msgs, s->terminal_msgs_tail, vcg); - rc_stack_push(lp, cur_entry, delete_terminal_dally_message_list, s->st); - s->terminal_length[vcg] -= s->params->chunk_size; - cur_entry = NULL; - if(next_vcg >= 0) - cur_entry = s->terminal_msgs[next_vcg]; + s->last_qos_lvl = 0; + s->rank_tbl = NULL; + s->terminal_msgs = + (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*)); + s->terminal_msgs_tail = + (terminal_dally_message_list**)calloc(num_qos_levels, sizeof(terminal_dally_message_list*)); - /* if there is another packet inline then schedule another send event */ - if(cur_entry != NULL && - s->vc_occupancy[next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) { - terminal_dally_message *m_new; - msg->num_rngs++; - ts += tw_rand_unif(lp->rng); - e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY, - (void**)&m_new, NULL); - m_new->type = T_SEND; - m_new->magic = terminal_magic_num; - tw_event_send(e); - } else { - /* If not then the LP will wait for another credit or packet generation */ - bf->c4 = 1; - s->in_send_loop = 0; + for(int i = 0; i < num_qos_levels; i++) + { + s->terminal_msgs[i] = NULL; + s->terminal_msgs_tail[i] = NULL; } - if(s->issueIdle) { - bf->c5 = 1; - s->issueIdle = 0; - msg->num_rngs++; - ts += tw_rand_unif(lp->rng); - model_net_method_idle_event(ts, 0, lp); - - if(s->last_buf_full > 0.0) - { - bf->c6 = 1; - msg->saved_total_time = s->busy_time; - msg->saved_busy_time = s->last_buf_full; - msg->saved_sample_time = s->busy_time_sample; + s->in_send_loop = 0; + s->issueIdle = 0; - s->busy_time += (tw_now(lp) - s->last_buf_full); - s->busy_time_sample += (tw_now(lp) - s->last_buf_full); - s->ross_sample.busy_time_sample += (tw_now(lp) - s->last_buf_full); - msg->saved_busy_time_ross = s->busy_time_ross_sample; - s->busy_time_ross_sample += (tw_now(lp) - s->last_buf_full); - s->last_buf_full = 0.0; - } - } + /*if(s->terminal_id == 0) + { + char term_bw_log[64]; + sprintf(term_bw_log, "terminal-bw-tracker"); + dragonfly_term_bw_log = fopen(term_bw_log, "w"); + fprintf(dragonfly_term_bw_log, "\n term-id time-stamp port-id busy-time"); + }*/ return; } -static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +/* sets up the router virtual channels, global channels, + * local channels, compute node channels */ +void router_dally_init(router_state * r, tw_lp * lp) { - - for(int i = 0; i < msg->num_rngs; i++) - tw_rand_reverse_unif(lp->rng); - - for(int i = 0; i < msg->num_cll; i++) - codes_local_latency_reverse(lp); - if(bf->c31) - { - s->packet_fin--; - packet_fin--; - } - - if(msg->path_type == MINIMAL) - minimal_count--; - if(msg->path_type == NON_MINIMAL) - nonmin_count--; - - N_finished_chunks--; - s->finished_chunks--; - s->fin_chunks_sample--; - s->ross_sample.fin_chunks_sample--; - s->fin_chunks_ross_sample--; - - total_hops -= msg->my_N_hop; - s->total_hops -= msg->my_N_hop; - s->fin_hops_sample -= msg->my_N_hop; - s->ross_sample.fin_hops_sample -= msg->my_N_hop; - s->fin_hops_ross_sample -= msg->my_N_hop; - s->fin_chunks_time = msg->saved_sample_time; - s->ross_sample.fin_chunks_time = msg->saved_sample_time; - s->fin_chunks_time_ross_sample = msg->saved_fin_chunks_ross; - s->total_time = msg->saved_avg_time; - - struct qhash_head * hash_link = NULL; - struct dfly_qhash_entry * tmp = NULL; - - struct dfly_hash_key key; - key.message_id = msg->message_id; - key.sender_id = msg->sender_lp; - - hash_link = qhash_search(s->rank_tbl, &key); - tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); - - mn_stats* stat; - stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); - stat->recv_time = msg->saved_rcv_time; + char anno[MAX_NAME_LENGTH]; + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, + &mapping_type_id, anno, &mapping_rep_id, &mapping_offset); - if(bf->c1) - { - stat->recv_count--; - stat->recv_bytes -= msg->packet_size; - N_finished_packets--; - s->finished_packets--; - } - if(bf->c3) - { - dragonfly_max_latency = msg->saved_available_time; + if (anno[0] == '\0'){ + r->anno = NULL; + r->params = &all_params[num_params-1]; + } else{ + r->anno = strdup(anno); + int id = configuration_get_annotation_index(anno, anno_map); + r->params = &all_params[id]; } - - if(bf->c22) - { - s->max_latency = msg->saved_available_time; - } - if(bf->c7) - { - //assert(!hash_link); - if(bf->c4) - model_net_event_rc2(lp, &msg->event_rc); - - N_finished_msgs--; - s->finished_msgs--; - total_msg_sz -= msg->total_size; - s->total_msg_size -= msg->total_size; - s->data_size_sample -= msg->total_size; - s->ross_sample.data_size_sample -= msg->total_size; - s->data_size_ross_sample -= msg->total_size; - - struct dfly_qhash_entry * d_entry_pop = (dfly_qhash_entry *)rc_stack_pop(s->st); - qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link)); - s->rank_tbl_pop++; - - if(s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) - tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); - - hash_link = &(d_entry_pop->hash_link); - tmp = d_entry_pop; - } - - assert(tmp); - tmp->num_chunks--; + // shorthand + const dragonfly_param *p = r->params; - if(bf->c5) - { - qhash_del(hash_link); - free_tmp(tmp); - s->rank_tbl_pop--; - } - - return; -} + num_routers_per_mgrp = codes_mapping_get_lp_count (lp_group_name, 1, "modelnet_dragonfly_dally_router", + NULL, 0); + int num_grp_reps = codes_mapping_get_group_reps(lp_group_name); + if(p->total_routers != num_grp_reps * num_routers_per_mgrp) + tw_error(TW_LOC, "\n Config error: num_routers specified %d total routers computed in the network %d " + "does not match with repetitions * dragonfly_router %d ", + p->num_routers, p->total_routers, num_grp_reps * num_routers_per_mgrp); -static void send_remote_event(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf, char * event_data, int remote_event_size) -{ - void * tmp_ptr = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + r->router_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + r->group_id=r->router_id/p->num_routers; - msg->num_rngs++; - tw_stime ts = g_tw_lookahead + mpi_soft_overhead + tw_rand_unif(lp->rng); + char rtr_bw_log[128]; + sprintf(rtr_bw_log, "router-bw-tracker-%lu", g_tw_mynode); - if (msg->is_pull){ - bf->c4 = 1; - struct codes_mctx mc_dst = - codes_mctx_set_global_direct(msg->sender_mn_lp); - struct codes_mctx mc_src = - codes_mctx_set_global_direct(lp->gid); - int net_id = model_net_get_id(LP_METHOD_NM_TERM); + if(dragonfly_rtr_bw_log == NULL) + { + dragonfly_rtr_bw_log = fopen(rtr_bw_log, "w+"); - model_net_set_msg_param(MN_MSG_PARAM_START_TIME, MN_MSG_PARAM_START_TIME_VAL, &(msg->msg_start_time)); - - msg->event_rc = model_net_event_mctx(net_id, &mc_src, &mc_dst, msg->category, - msg->sender_lp, msg->pull_size, ts, - remote_event_size, tmp_ptr, 0, NULL, lp); - } - else{ - tw_event * e = tw_event_new(msg->final_dest_gid, ts, lp); - void * m_remote = tw_event_data(e); - memcpy(m_remote, event_data, remote_event_size); - tw_event_send(e); + fprintf(dragonfly_rtr_bw_log, "\n router-id time-stamp port-id qos-level bw-consumed qos-status qos-data busy-time"); } - return; -} + //printf("\n Local router id %d global id %d ", r->router_id, lp->gid); -/* packet arrives at the destination terminal */ -static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) -{ + r->is_monitoring_bw = 0; + r->fwd_events = 0; + r->rev_events = 0; + r->ross_rsample.fwd_events = 0; + r->ross_rsample.rev_events = 0; - if (msg->my_N_hop > s->params->max_hops_notify) - { - printf("Terminal received a packet with %d hops! (Notify on > than %d)\n",msg->my_N_hop, s->params->max_hops_notify); - } - // NIC aggregation - should this be a separate function? - // Trigger an event on receiving server + int num_qos_levels = p->num_qos_levels; - msg->num_rngs = 0; - msg->num_cll = 0; + r->connMan = &connManagerList[r->router_id]; - if(!s->rank_tbl) - s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE); - - struct dfly_hash_key key; - key.message_id = msg->message_id; - key.sender_id = msg->sender_lp; - - struct qhash_head *hash_link = NULL; - struct dfly_qhash_entry * tmp = NULL; - - hash_link = qhash_search(s->rank_tbl, &key); - - if(hash_link) - tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); + r->global_channel = (int*)calloc(p->num_global_channels, sizeof(int)); + r->next_output_available_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + r->link_traffic = (int64_t*)calloc(p->radix, sizeof(int64_t)); + r->link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); - uint64_t total_chunks = msg->total_size / s->params->chunk_size; + r->stalled_chunks = (unsigned long*)calloc(p->radix, sizeof(unsigned long)); - if(msg->total_size % s->params->chunk_size) - total_chunks++; + r->vc_occupancy = (int**)calloc(p->radix , sizeof(int*)); + r->in_send_loop = (int*)calloc(p->radix, sizeof(int)); + r->qos_data = (int**)calloc(p->radix, sizeof(int*)); + r->last_qos_lvl = (int*)calloc(p->radix, sizeof(int)); + r->qos_status = (int**)calloc(p->radix, sizeof(int*)); + r->pending_msgs = + (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**)); + r->pending_msgs_tail = + (terminal_dally_message_list***)calloc((p->radix), sizeof(terminal_dally_message_list**)); + r->queued_msgs = + (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**)); + r->queued_msgs_tail = + (terminal_dally_message_list***)calloc(p->radix, sizeof(terminal_dally_message_list**)); + r->queued_count = (int*)calloc(p->radix, sizeof(int)); + r->last_buf_full = (tw_stime*)calloc(p->radix, sizeof(tw_stime*)); + r->busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + r->busy_time_sample = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); - if(!total_chunks) - total_chunks = 1; + /* set up for ROSS stats sampling */ + r->link_traffic_ross_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); + r->busy_time_ross_sample = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + if (g_st_model_stats) + lp->model_types->mstat_sz = sizeof(tw_lpid) + (sizeof(int64_t) + sizeof(tw_stime)) * p->radix; + if (g_st_use_analysis_lps && g_st_model_stats) + lp->model_types->sample_struct_sz = sizeof(struct dfly_router_sample) + (sizeof(tw_stime) + sizeof(int64_t)) * p->radix; + r->ross_rsample.busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + r->ross_rsample.link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); - /*if(tmp) + rc_stack_create(&r->st); + + for(int i=0; i < p->radix; i++) { - if(tmp->num_chunks >= total_chunks || tmp->num_chunks < 0) + // Set credit & router occupancy + r->last_buf_full[i] = 0.0; + r->busy_time[i] = 0.0; + r->busy_time_sample[i] = 0.0; + r->next_output_available_time[i]=0; + r->last_qos_lvl[i] = 0; + r->link_traffic[i]=0; + r->link_traffic_sample[i] = 0; + r->queued_count[i] = 0; + r->in_send_loop[i] = 0; + r->vc_occupancy[i] = (int*)calloc(p->num_vcs, sizeof(int)); + // printf("\n Number of vcs %d for radix %d ", p->num_vcs, p->radix); + r->pending_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, + sizeof(terminal_dally_message_list*)); + r->pending_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs, + sizeof(terminal_dally_message_list*)); + r->queued_msgs[i] = (terminal_dally_message_list**)calloc(p->num_vcs, + sizeof(terminal_dally_message_list*)); + r->queued_msgs_tail[i] = (terminal_dally_message_list**)calloc(p->num_vcs, + sizeof(terminal_dally_message_list*)); + r->qos_status[i] = (int*)calloc(num_qos_levels, sizeof(int)); + r->qos_data[i] = (int*)calloc(num_qos_levels, sizeof(int)); + for(int j = 0; j < num_qos_levels; j++) { - //tw_output(lp, "\n invalid number of chunks %d for LP %ld ", tmp->num_chunks, lp->gid); - tw_lp_suspend(lp, 0, 0); - return; + r->qos_status[i][j] = Q_ACTIVE; + r->qos_data[i][j] = 0; } - }*/ - assert(lp->gid == msg->dest_terminal_lpid); + for(int j = 0; j < p->num_vcs; j++) + { + r->pending_msgs[i][j] = NULL; + r->pending_msgs_tail[i][j] = NULL; + r->queued_msgs[i][j] = NULL; + r->queued_msgs_tail[i][j] = NULL; + } + } - if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) - printf("\n Packet %llu arrived at lp %llu hops %d ", msg->sender_lp, LLU(lp->gid), msg->my_N_hop); - - msg->num_rngs++; - tw_stime ts = g_tw_lookahead + s->params->credit_delay + tw_rand_unif(lp->rng); + r->connMan->solidify_connections(); - // no method_event here - message going to router - tw_event * buf_e; - terminal_dally_message * buf_msg; - buf_e = model_net_method_event_new(msg->intm_lp_id, ts, lp, - DRAGONFLY_DALLY_ROUTER, (void**)&buf_msg, NULL); - buf_msg->magic = router_magic_num; - buf_msg->vc_index = msg->vc_index; - buf_msg->output_chan = msg->output_chan; - buf_msg->type = R_BUFFER; - tw_event_send(buf_e); - - bf->c1 = 0; - bf->c3 = 0; - bf->c4 = 0; - bf->c7 = 0; - - /* Total overall finished chunks in simulation */ - N_finished_chunks++; - /* Finished chunks on a LP basis */ - s->finished_chunks++; - /* Finished chunks per sample */ - s->fin_chunks_sample++; - s->ross_sample.fin_chunks_sample++; - s->fin_chunks_ross_sample++; - - /* WE do not allow self messages through dragonfly */ - assert(lp->gid != msg->src_terminal_id); + return; +} - uint64_t num_chunks = msg->packet_size / s->params->chunk_size; - if (msg->packet_size < s->params->chunk_size) - num_chunks++; +/* dragonfly packet event reverse handler */ +static void dragonfly_dally_packet_event_rc(tw_lp *sender) +{ + codes_local_latency_reverse(sender); + return; +} - if(msg->path_type == MINIMAL) - minimal_count++; +/* dragonfly packet event , generates a dragonfly packet on the compute node */ +static tw_stime dragonfly_dally_packet_event( + model_net_request const * req, + uint64_t message_offset, + uint64_t packet_size, + tw_stime offset, + mn_sched_params const * sched_params, + void const * remote_event, + void const * self_event, + tw_lp *sender, + int is_last_pckt) +{ + (void)message_offset; + (void)sched_params; + tw_event * e_new; + tw_stime xfer_to_nic_time; + terminal_dally_message * msg; + char* tmp_ptr; - if(msg->path_type == NON_MINIMAL) - nonmin_count++; + xfer_to_nic_time = codes_local_latency(sender); + //e_new = tw_event_new(sender->gid, xfer_to_nic_time+offset, sender); + //msg = tw_event_data(e_new); + e_new = model_net_method_event_new(sender->gid, xfer_to_nic_time+offset, + sender, DRAGONFLY_DALLY, (void**)&msg, (void**)&tmp_ptr); + strcpy(msg->category, req->category); + msg->final_dest_gid = req->final_dest_lp; + msg->total_size = req->msg_size; + msg->sender_lp=req->src_lp; + msg->sender_mn_lp = sender->gid; + msg->packet_size = packet_size; + msg->travel_start_time = tw_now(sender); + msg->remote_event_size_bytes = 0; + msg->local_event_size_bytes = 0; + msg->type = T_GENERATE; + msg->dest_terminal_lpid = req->dest_mn_lp; + msg->dfdally_dest_terminal_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_lpid,0,0); + msg->message_id = req->msg_id; + msg->is_pull = req->is_pull; + msg->pull_size = req->pull_size; + msg->magic = terminal_magic_num; + msg->msg_start_time = req->msg_start_time; - if(msg->chunk_id == num_chunks - 1) + if(is_last_pckt) /* Its the last packet so pass in remote and local event information*/ { - bf->c31 = 1; - s->packet_fin++; - packet_fin++; - } - if(msg->path_type != MINIMAL && msg->path_type != NON_MINIMAL) - printf("\n Wrong message path type %d ", msg->path_type); + if(req->remote_event_size > 0) + { + msg->remote_event_size_bytes = req->remote_event_size; + memcpy(tmp_ptr, remote_event, req->remote_event_size); + tmp_ptr += req->remote_event_size; + } + if(req->self_event_size > 0) + { + msg->local_event_size_bytes = req->self_event_size; + memcpy(tmp_ptr, self_event, req->self_event_size); + tmp_ptr += req->self_event_size; + } + } + //printf("\n dragonfly remote event %d local event %d last packet %d %lf ", msg->remote_event_size_bytes, msg->local_event_size_bytes, is_last_pckt, xfer_to_nic_time); + tw_event_send(e_new); + return xfer_to_nic_time; +} - /* save the sample time */ - msg->saved_sample_time = s->fin_chunks_time; - s->fin_chunks_time += (tw_now(lp) - msg->travel_start_time); - s->ross_sample.fin_chunks_time += (tw_now(lp) - msg->travel_start_time); - msg->saved_fin_chunks_ross = s->fin_chunks_time_ross_sample; - s->fin_chunks_time_ross_sample += (tw_now(lp) - msg->travel_start_time); +static void packet_generate_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + int num_qos_levels = s->params->num_qos_levels; + if(bf->c1) + s->is_monitoring_bw = 0; - /* save the total time per LP */ - msg->saved_avg_time = s->total_time; - s->total_time += (tw_now(lp) - msg->travel_start_time); - total_hops += msg->my_N_hop; - s->total_hops += msg->my_N_hop; - s->fin_hops_sample += msg->my_N_hop; - s->ross_sample.fin_hops_sample += msg->my_N_hop; - s->fin_hops_ross_sample += msg->my_N_hop; - - mn_stats* stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); - msg->saved_rcv_time = stat->recv_time; - stat->recv_time += (tw_now(lp) - msg->travel_start_time); - -#if DEBUG == 1 - if( msg->packet_ID == TRACK - && msg->chunk_id == num_chunks-1 - && msg->message_id == TRACK_MSG) - { - printf( "(%lf) [Terminal %d] packet %lld has arrived \n", - tw_now(lp), (int)lp->gid, msg->packet_ID); + s->total_gen_size -= msg->packet_size; + s->packet_gen--; + packet_gen--; + s->packet_counter--; - printf("travel start time is %f\n", - msg->travel_start_time); + if(bf->c2) + num_local_packets_sr--; + if(bf->c3) + num_local_packets_sg--; + if(bf->c4) + num_remote_packets--; - printf("My hop now is %d\n",msg->my_N_hop); - } -#endif + for(int i = 0; i < msg->num_rngs; i++) + tw_rand_reverse_unif(lp->rng); - /* Now retreieve the number of chunks completed from the hash and update - * them */ - void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); - /* If an entry does not exist then create one */ - if(!tmp) - { - bf->c5 = 1; - struct dfly_qhash_entry * d_entry = (dfly_qhash_entry *)calloc(1, sizeof (struct dfly_qhash_entry)); - d_entry->num_chunks = 0; - d_entry->key = key; - d_entry->remote_event_data = NULL; - d_entry->remote_event_size = 0; - qhash_add(s->rank_tbl, &key, &(d_entry->hash_link)); - s->rank_tbl_pop++; - - if(s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) - tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); - - hash_link = &(d_entry->hash_link); - tmp = d_entry; - } - - assert(tmp); - tmp->num_chunks++; + int num_chunks = msg->packet_size/s->params->chunk_size; + if(msg->packet_size < s->params->chunk_size) + num_chunks++; - if(msg->chunk_id == num_chunks - 1) + int i; + int vcg = 0; + if(num_qos_levels > 1) { - bf->c1 = 1; - stat->recv_count++; - stat->recv_bytes += msg->packet_size; - - N_finished_packets++; - s->finished_packets++; + vcg = get_vcg_from_category(msg); + assert(vcg == Q_HIGH || vcg == Q_MEDIUM); } + assert(vcg < num_qos_levels); - /* if its the last chunk of the packet then handle the remote event data */ - if(msg->remote_event_size_bytes > 0 && !tmp->remote_event_data) - { - /* Retreive the remote event entry */ - tmp->remote_event_data = (char*)calloc(1, msg->remote_event_size_bytes); - assert(tmp->remote_event_data); - tmp->remote_event_size = msg->remote_event_size_bytes; - memcpy(tmp->remote_event_data, m_data_src, msg->remote_event_size_bytes); + for(i = 0; i < num_chunks; i++) { + delete_terminal_dally_message_list(return_tail(s->terminal_msgs, s->terminal_msgs_tail, vcg)); + s->terminal_length[vcg] -= s->params->chunk_size; } - - if(s->min_latency > tw_now(lp) - msg->travel_start_time) { - s->min_latency = tw_now(lp) - msg->travel_start_time; - } - - if (dragonfly_max_latency < tw_now( lp ) - msg->travel_start_time) { - bf->c3 = 1; - msg->saved_available_time = dragonfly_max_latency; - dragonfly_max_latency = tw_now( lp ) - msg->travel_start_time; - s->max_latency = tw_now(lp) - msg->travel_start_time; + if(bf->c5) { + s->in_send_loop = 0; } - if(s->max_latency < tw_now( lp ) - msg->travel_start_time) { - bf->c22 = 1; - msg->saved_available_time = s->max_latency; - s->max_latency = tw_now(lp) - msg->travel_start_time; - } - /* If all chunks of a message have arrived then send a remote event to the - * callee*/ - //assert(tmp->num_chunks <= total_chunks); - - if(tmp->num_chunks >= total_chunks) - { - bf->c7 = 1; - s->data_size_sample += msg->total_size; - s->ross_sample.data_size_sample += msg->total_size; - s->data_size_ross_sample += msg->total_size; - N_finished_msgs++; - total_msg_sz += msg->total_size; - s->total_msg_size += msg->total_size; - s->finished_msgs++; - - //assert(tmp->remote_event_data && tmp->remote_event_size > 0); - if(tmp->remote_event_data && tmp->remote_event_size > 0) { - send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size); + if (bf->c11) { + s->issueIdle = 0; + s->stalled_chunks--; + if(bf->c8) { + s->last_buf_full = msg->saved_busy_time; } - /* Remove the hash entry */ - qhash_del(hash_link); - rc_stack_push(lp, tmp, free_tmp, s->st); - s->rank_tbl_pop--; - } - return; + } + struct mn_stats* stat; + stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->send_count--; + stat->send_bytes -= msg->packet_size; + stat->send_time -= (1/s->params->cn_bandwidth) * msg->packet_size; } -static void ross_dally_dragonfly_rsample_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample) -{ - (void)lp; - (void)bf; +/* generates packet at the current dragonfly compute node */ +static void packet_generate(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { - const dragonfly_param * p = s->params; - int i = 0; + msg->num_rngs = 0; + msg->num_cll = 0; - sample->router_id = s->router_id; - sample->end_time = tw_now(lp); - sample->fwd_events = s->ross_rsample.fwd_events; - sample->rev_events = s->ross_rsample.rev_events; - sample->busy_time = (tw_stime*)((&sample->rev_events) + 1); - sample->link_traffic_sample = (int64_t*)((&sample->busy_time[0]) + p->radix); + packet_gen++; + int num_qos_levels = s->params->num_qos_levels; + int vcg = 0; - for(; i < p->radix; i++) + if(num_qos_levels > 1) { - sample->busy_time[i] = s->ross_rsample.busy_time[i]; - sample->link_traffic_sample[i] = s->ross_rsample.link_traffic_sample[i]; + tw_lpid router_id; + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, + &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 0, + s->router_id / num_routers_per_mgrp, s->router_id % num_routers_per_mgrp, &router_id); + if(s->is_monitoring_bw == 0) + { + bf->c1 = 1; + /* Issue an event on both terminal and router to monitor bandwidth */ + msg->num_cll++; + tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); + terminal_dally_message * m; + tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, DRAGONFLY_DALLY, + (void**)&m, NULL); + m->type = T_BANDWIDTH; + m->magic = terminal_magic_num; + s->is_monitoring_bw = 1; + tw_event_send(e); + } + vcg = get_vcg_from_category(msg); + assert(vcg == Q_HIGH || vcg == Q_MEDIUM); } + assert(vcg < num_qos_levels); - /* clear up the current router stats */ - s->ross_rsample.fwd_events = 0; - s->ross_rsample.rev_events = 0; + s->packet_gen++; + s->total_gen_size += msg->packet_size; - for( i = 0; i < p->radix; i++) - { - s->ross_rsample.busy_time[i] = 0; - s->ross_rsample.link_traffic_sample[i] = 0; - } -} + tw_stime ts, nic_ts; -static void ross_dally_dragonfly_rsample_rc_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample) -{ - (void)lp; - (void)bf; - - const dragonfly_param * p = s->params; - int i =0; + assert(lp->gid != msg->dest_terminal_lpid); + const dragonfly_param *p = s->params; - for(; i < p->radix; i++) + int total_event_size; + uint64_t num_chunks = msg->packet_size / p->chunk_size; + double cn_delay = s->params->cn_delay; + + if (msg->packet_size < s->params->chunk_size) + num_chunks++; + + if(msg->packet_size < s->params->chunk_size) + cn_delay = bytes_to_ns(msg->packet_size % s->params->chunk_size, s->params->cn_bandwidth); + + int dest_router_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_lpid, 0, 0) / s->params->num_cn; + int dest_grp_id = dest_router_id / s->params->num_routers; + int src_grp_id = s->router_id / s->params->num_routers; + + if(src_grp_id == dest_grp_id) { - s->ross_rsample.busy_time[i] = sample->busy_time[i]; - s->ross_rsample.link_traffic_sample[i] = sample->link_traffic_sample[i]; + if(dest_router_id == s->router_id) + { + bf->c2 = 1; + num_local_packets_sr++; + } + else + { + bf->c3 = 1; + num_local_packets_sg++; + } } + else + { + bf->c4 = 1; + num_remote_packets++; + } + msg->num_rngs++; + nic_ts = g_tw_lookahead + (num_chunks * cn_delay) + tw_rand_unif(lp->rng); + + msg->packet_ID = s->packet_counter; + s->packet_counter++; + msg->my_N_hop = 0; + msg->my_l_hop = 0; + msg->my_g_hop = 0; - s->ross_rsample.fwd_events = sample->fwd_events; - s->ross_rsample.rev_events = sample->rev_events; -} -static void ross_dally_dragonfly_sample_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample) -{ - (void)lp; - (void)bf; + for(int i = 0; i < num_chunks; i++) + { + terminal_dally_message_list *cur_chunk = (terminal_dally_message_list*)calloc(1, + sizeof(terminal_dally_message_list)); + msg->origin_router_id = s->router_id; + init_terminal_dally_message_list(cur_chunk, msg); - sample->terminal_id = s->terminal_id; - sample->fin_chunks_sample = s->ross_sample.fin_chunks_sample; - sample->data_size_sample = s->ross_sample.data_size_sample; - sample->fin_hops_sample = s->ross_sample.fin_hops_sample; - sample->fin_chunks_time = s->ross_sample.fin_chunks_time; - sample->busy_time_sample = s->ross_sample.busy_time_sample; - sample->end_time = tw_now(lp); - sample->fwd_events = s->ross_sample.fwd_events; - sample->rev_events = s->ross_sample.rev_events; - - s->ross_sample.fin_chunks_sample = 0; - s->ross_sample.data_size_sample = 0; - s->ross_sample.fin_hops_sample = 0; - s->ross_sample.fwd_events = 0; - s->ross_sample.rev_events = 0; - s->ross_sample.fin_chunks_time = 0; - s->ross_sample.busy_time_sample = 0; -} + if(msg->remote_event_size_bytes + msg->local_event_size_bytes > 0) { + cur_chunk->event_data = (char*)calloc(1, + msg->remote_event_size_bytes + msg->local_event_size_bytes); + } + + void * m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + if (msg->remote_event_size_bytes){ + memcpy(cur_chunk->event_data, m_data_src, msg->remote_event_size_bytes); + } + if (msg->local_event_size_bytes){ + m_data_src = (char*)m_data_src + msg->remote_event_size_bytes; + memcpy((char*)cur_chunk->event_data + msg->remote_event_size_bytes, + m_data_src, msg->local_event_size_bytes); + } -static void ross_dally_dragonfly_sample_rc_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample) -{ - (void)lp; - (void)bf; + cur_chunk->msg.output_chan = vcg; + cur_chunk->msg.chunk_id = i; + cur_chunk->msg.origin_router_id = s->router_id; + append_to_terminal_dally_message_list(s->terminal_msgs, s->terminal_msgs_tail, + vcg, cur_chunk); + s->terminal_length[vcg] += s->params->chunk_size; + } - s->ross_sample.busy_time_sample = sample->busy_time_sample; - s->ross_sample.fin_chunks_time = sample->fin_chunks_time; - s->ross_sample.fin_hops_sample = sample->fin_hops_sample; - s->ross_sample.data_size_sample = sample->data_size_sample; - s->ross_sample.fin_chunks_sample = sample->fin_chunks_sample; - s->ross_sample.fwd_events = sample->fwd_events; - s->ross_sample.rev_events = sample->rev_events; -} + if(s->terminal_length[vcg] < s->params->cn_vc_size) { + model_net_method_idle_event(nic_ts, 0, lp); + } else { + bf->c11 = 1; + s->issueIdle = 1; + s->stalled_chunks++; -void dragonfly_dally_rsample_init(router_state * s, - tw_lp * lp) -{ - (void)lp; - int i = 0; - const dragonfly_param * p = s->params; + //this block was missing from when QOS was added - readded 5-21-19 + if(s->last_buf_full == 0.0) + { + bf->c8 = 1; + msg->saved_busy_time = s->last_buf_full; + /* TODO: Assumes a single vc from terminal to router */ + s->last_buf_full = tw_now(lp); + } + } + + if(s->in_send_loop == 0) { + bf->c5 = 1; + msg->num_cll++; + ts = codes_local_latency(lp); + terminal_dally_message *m; + tw_event* e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY, + (void**)&m, NULL); + m->type = T_SEND; + m->magic = terminal_magic_num; + s->in_send_loop = 1; + tw_event_send(e); + } - assert(p->radix); + total_event_size = model_net_get_msg_sz(DRAGONFLY_DALLY) + + msg->remote_event_size_bytes + msg->local_event_size_bytes; + mn_stats* stat; + stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->send_count++; + stat->send_bytes += msg->packet_size; + stat->send_time += (1/p->cn_bandwidth) * msg->packet_size; + if(stat->max_event_size < total_event_size) + stat->max_event_size = total_event_size; - s->max_arr_size = MAX_STATS; - s->rsamples = (struct dfly_router_sample*)calloc(MAX_STATS, sizeof(struct dfly_router_sample)); - for(; i < s->max_arr_size; i++) - { - s->rsamples[i].busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); - s->rsamples[i].link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); - } + return; } -void dragonfly_dally_rsample_rc_fn(router_state * s, - tw_bf * bf, - terminal_dally_message * msg, - tw_lp * lp) +static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { - (void)bf; - (void)lp; - (void)msg; - - s->op_arr_size--; - int cur_indx = s->op_arr_size; - struct dfly_router_sample stat = s->rsamples[cur_indx]; + int num_qos_levels = s->params->num_qos_levels; - const dragonfly_param * p = s->params; - int i =0; + if(msg->qos_reset1) + s->qos_status[0] = Q_ACTIVE; + if(msg->qos_reset2) + s->qos_status[1] = Q_ACTIVE; + + if(msg->last_saved_qos) + s->last_qos_lvl = msg->last_saved_qos; - for(; i < p->radix; i++) + if(bf->c1) { + s->in_send_loop = 1; + if(bf->c3) + s->last_buf_full = msg->saved_busy_time; + + return; + } + + int vcg = msg->saved_vc; + s->terminal_available_time = msg->saved_available_time; + + for(int i = 0; i < msg->num_cll; i++) { - s->busy_time_sample[i] = stat.busy_time[i]; - s->link_traffic_sample[i] = stat.link_traffic_sample[i]; + codes_local_latency_reverse(lp); } - for( i = 0; i < p->radix; i++) + for(int i = 0; i < msg->num_rngs; i++) { - stat.busy_time[i] = 0; - stat.link_traffic_sample[i] = 0; + tw_rand_reverse_unif(lp->rng); } - s->fwd_events = stat.fwd_events; - s->rev_events = stat.rev_events; -} - -void dragonfly_dally_rsample_fn(router_state * s, - tw_bf * bf, - terminal_dally_message * msg, - tw_lp * lp) -{ - (void)bf; - (void)lp; - (void)msg; + s->terminal_length[vcg] += s->params->chunk_size; + /*TODO: MM change this to the vcg */ + s->vc_occupancy[vcg] -= s->params->chunk_size; - const dragonfly_param * p = s->params; + terminal_dally_message_list* cur_entry = (terminal_dally_message_list *)rc_stack_pop(s->st); + + int data_size = s->params->chunk_size; + if(cur_entry->msg.packet_size < s->params->chunk_size) + data_size = cur_entry->msg.packet_size % s->params->chunk_size; - if(s->op_arr_size >= s->max_arr_size) + s->qos_data[vcg] -= data_size; + + prepend_to_terminal_dally_message_list(s->terminal_msgs, + s->terminal_msgs_tail, vcg, cur_entry); + if(bf->c4) { + s->in_send_loop = 1; + } + if(bf->c5) { - struct dfly_router_sample * tmp = (dfly_router_sample *)calloc((MAX_STATS + s->max_arr_size), sizeof(struct dfly_router_sample)); - memcpy(tmp, s->rsamples, s->op_arr_size * sizeof(struct dfly_router_sample)); - free(s->rsamples); - s->rsamples = tmp; - s->max_arr_size += MAX_STATS; + s->issueIdle = 1; + if(bf->c6) + { + s->busy_time = msg->saved_total_time; + s->last_buf_full = msg->saved_busy_time; + s->busy_time_sample = msg->saved_sample_time; + s->ross_sample.busy_time_sample = msg->saved_sample_time; + s->busy_time_ross_sample = msg->saved_busy_time_ross; + } } + return; +} +/* sends the packet from the current dragonfly compute node to the attached router */ +static void packet_send(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + + tw_stime ts; + tw_event *e; + terminal_dally_message *m; + tw_lpid router_id; + int vcg = 0; + int num_qos_levels = s->params->num_qos_levels; + + msg->last_saved_qos = -1; + msg->qos_reset1 = -1; + msg->qos_reset2 = -1; + msg->num_rngs = 0; + msg->num_cll = 0; - int i = 0; - int cur_indx = s->op_arr_size; + vcg = get_next_vcg(s, bf, msg, lp); + + /* For a terminal to router connection, there would be as many VCGs as number + * of VCs*/ - s->rsamples[cur_indx].router_id = s->router_id; - s->rsamples[cur_indx].end_time = tw_now(lp); - s->rsamples[cur_indx].fwd_events = s->fwd_events; - s->rsamples[cur_indx].rev_events = s->rev_events; + if(vcg == -1) { + bf->c1 = 1; + s->in_send_loop = 0; + if(!s->last_buf_full) + { + bf->c3 = 1; + msg->saved_busy_time = s->last_buf_full; + s->last_buf_full = tw_now(lp); + } + return; + } - for(; i < p->radix; i++) + msg->saved_vc = vcg; + terminal_dally_message_list* cur_entry = s->terminal_msgs[vcg]; + int data_size = s->params->chunk_size; + uint64_t num_chunks = cur_entry->msg.packet_size/s->params->chunk_size; + if(cur_entry->msg.packet_size < s->params->chunk_size) + num_chunks++; + + tw_stime delay = s->params->cn_delay; + if((cur_entry->msg.packet_size < s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) { - s->rsamples[cur_indx].busy_time[i] = s->busy_time_sample[i]; - s->rsamples[cur_indx].link_traffic_sample[i] = s->link_traffic_sample[i]; + data_size = cur_entry->msg.packet_size % s->params->chunk_size; + delay = bytes_to_ns(cur_entry->msg.packet_size % s->params->chunk_size, s->params->cn_bandwidth); } - s->op_arr_size++; + s->qos_data[vcg] += data_size; + + msg->saved_available_time = s->terminal_available_time; + + msg->num_rngs++; + ts = g_tw_lookahead + delay + tw_rand_unif(lp->rng); + + s->terminal_available_time = maxd(s->terminal_available_time, tw_now(lp)); + s->terminal_available_time += ts; - /* clear up the current router stats */ - s->fwd_events = 0; - s->rev_events = 0; + ts = s->terminal_available_time - tw_now(lp); + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, + &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 0, + s->router_id / num_routers_per_mgrp, s->router_id % num_routers_per_mgrp, &router_id); - for( i = 0; i < p->radix; i++) - { - s->busy_time_sample[i] = 0; - s->link_traffic_sample[i] = 0; + // if(s->router_id == 1) + // printf("\n Local router id %d global router id %d ", s->router_id, router_id); + // we are sending an event to the router, so no method_event here + void * remote_event; + e = model_net_method_event_new(router_id, ts, lp, + DRAGONFLY_DALLY_ROUTER, (void**)&m, &remote_event); + memcpy(m, &cur_entry->msg, sizeof(terminal_dally_message)); + if (m->remote_event_size_bytes){ + memcpy(remote_event, cur_entry->event_data, m->remote_event_size_bytes); } -} -//TODO redo this -void dragonfly_dally_rsample_fin(router_state * s, - tw_lp * lp) -{ - (void)lp; - const dragonfly_param * p = s->params; + m->type = R_ARRIVE; + m->src_terminal_id = lp->gid; + m->vc_index = vcg; + m->last_hop = TERMINAL; + m->magic = router_magic_num; + m->path_type = -1; + m->local_event_size_bytes = 0; + m->is_intm_visited = 0; + m->intm_grp_id = -1; + m->intm_rtr_id = -1; //for legacy prog-adaptive + tw_event_send(e); - if(s->router_id == 0) - { - /* write metadata file */ - char meta_fname[64]; - sprintf(meta_fname, "dragonfly-router-sampling.meta"); - FILE * fp = fopen(meta_fname, "w"); - fprintf(fp, "Router sample struct format: \nrouter_id (tw_lpid) \nbusy time for each of the %d links (double) \n" - "link traffic for each of the %d links (int64_t) \nsample end time (double) forward events per sample \nreverse events per sample ", - p->radix, p->radix); - // fprintf(fp, "\n\nOrdering of links \n%d green (router-router same row) channels \n %d black (router-router same column) channels \n %d global (router-router remote group)" - // " channels \n %d terminal channels", p->num_router_cols * p->num_row_chans, p->num_router_rows * p->num_col_chans, p->num_global_channels, p->num_cn); - fclose(fp); + if(cur_entry->msg.packet_ID == LLU(TRACK_PKT) && lp->gid == T_ID) + printf("\n Packet %llu generated at terminal %d dest %llu size %llu num chunks %llu router-id %d %llu", + cur_entry->msg.packet_ID, s->terminal_id, LLU(cur_entry->msg.dest_terminal_lpid), + LLU(cur_entry->msg.packet_size), LLU(num_chunks), s->router_id, LLU(router_id)); + + if(cur_entry->msg.chunk_id == num_chunks - 1 && (cur_entry->msg.local_event_size_bytes > 0)) + { + msg->num_cll++; + tw_stime local_ts = codes_local_latency(lp); + tw_event *e_new = tw_event_new(cur_entry->msg.sender_lp, local_ts, lp); + void * m_new = tw_event_data(e_new); + void *local_event = (char*)cur_entry->event_data + + cur_entry->msg.remote_event_size_bytes; + memcpy(m_new, local_event, cur_entry->msg.local_event_size_bytes); + tw_event_send(e_new); } - char rt_fn[MAX_NAME_LENGTH]; - if(strcmp(router_sample_file, "") == 0) - sprintf(rt_fn, "dragonfly-router-sampling-%ld.bin", g_tw_mynode); - else - sprintf(rt_fn, "%s-%ld.bin", router_sample_file, g_tw_mynode); - - int i = 0; + + s->vc_occupancy[vcg] += s->params->chunk_size; + cur_entry = return_head(s->terminal_msgs, s->terminal_msgs_tail, vcg); + rc_stack_push(lp, cur_entry, delete_terminal_dally_message_list, s->st); + s->terminal_length[vcg] -= s->params->chunk_size; - int size_sample = sizeof(tw_lpid) + p->radix * (sizeof(int64_t) + sizeof(tw_stime)) + sizeof(tw_stime) + 2 * sizeof(long); - FILE * fp = fopen(rt_fn, "a"); - fseek(fp, sample_rtr_bytes_written, SEEK_SET); + int next_vcg = 0; - for(; i < s->op_arr_size; i++) + if(num_qos_levels > 1) //I think this one is OK since the default is that terminals have only 1 VC anyway so leaving vcg as + next_vcg = get_next_vcg(s, bf, msg, lp); + + cur_entry = NULL; + if(next_vcg >= 0) + cur_entry = s->terminal_msgs[next_vcg]; + + /* if there is another packet inline then schedule another send event */ + if(cur_entry != NULL && s->vc_occupancy[next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) { + terminal_dally_message *m_new; + msg->num_rngs++; + ts += tw_rand_unif(lp->rng); + e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY, (void**)&m_new, NULL); + m_new->type = T_SEND; + m_new->magic = terminal_magic_num; + tw_event_send(e); + } else { + /* If not then the LP will wait for another credit or packet generation */ + bf->c4 = 1; + s->in_send_loop = 0; + } + if(s->issueIdle) { + bf->c5 = 1; + s->issueIdle = 0; + msg->num_rngs++; + ts += tw_rand_unif(lp->rng); + model_net_method_idle_event(ts, 0, lp); + + if(s->last_buf_full > 0.0) { - fwrite((void*)&(s->rsamples[i].router_id), sizeof(tw_lpid), 1, fp); - fwrite(s->rsamples[i].busy_time, sizeof(tw_stime), p->radix, fp); - fwrite(s->rsamples[i].link_traffic_sample, sizeof(int64_t), p->radix, fp); - fwrite((void*)&(s->rsamples[i].end_time), sizeof(tw_stime), 1, fp); - fwrite((void*)&(s->rsamples[i].fwd_events), sizeof(long), 1, fp); - fwrite((void*)&(s->rsamples[i].rev_events), sizeof(long), 1, fp); + bf->c6 = 1; + msg->saved_total_time = s->busy_time; + msg->saved_busy_time = s->last_buf_full; + msg->saved_sample_time = s->busy_time_sample; + + s->busy_time += (tw_now(lp) - s->last_buf_full); + s->busy_time_sample += (tw_now(lp) - s->last_buf_full); + s->ross_sample.busy_time_sample += (tw_now(lp) - s->last_buf_full); + msg->saved_busy_time_ross = s->busy_time_ross_sample; + s->busy_time_ross_sample += (tw_now(lp) - s->last_buf_full); + s->last_buf_full = 0.0; } - sample_rtr_bytes_written += (s->op_arr_size * size_sample); - fclose(fp); + } + return; } -void dragonfly_dally_sample_init(terminal_state * s, - tw_lp * lp) + +//used by packet_arrive() +static void send_remote_event(terminal_state * s, terminal_dally_message * msg, tw_lp * lp, tw_bf * bf, char * event_data, int remote_event_size) { - (void)lp; - s->fin_chunks_sample = 0; - s->data_size_sample = 0; - s->fin_hops_sample = 0; - s->fin_chunks_time = 0; - s->busy_time_sample = 0; + void * tmp_ptr = model_net_method_get_edata(DRAGONFLY_DALLY, msg); + + msg->num_rngs++; + tw_stime ts = g_tw_lookahead + mpi_soft_overhead + tw_rand_unif(lp->rng); - s->op_arr_size = 0; - s->max_arr_size = MAX_STATS; + if (msg->is_pull){ + bf->c4 = 1; + struct codes_mctx mc_dst = + codes_mctx_set_global_direct(msg->sender_mn_lp); + struct codes_mctx mc_src = + codes_mctx_set_global_direct(lp->gid); + int net_id = model_net_get_id(LP_METHOD_NM_TERM); - s->sample_stat = (dfly_cn_sample *)calloc(MAX_STATS, sizeof(struct dfly_cn_sample)); - + model_net_set_msg_param(MN_MSG_PARAM_START_TIME, MN_MSG_PARAM_START_TIME_VAL, &(msg->msg_start_time)); + + msg->event_rc = model_net_event_mctx(net_id, &mc_src, &mc_dst, msg->category, + msg->sender_lp, msg->pull_size, ts, + remote_event_size, tmp_ptr, 0, NULL, lp); + } + else{ + tw_event * e = tw_event_new(msg->final_dest_gid, ts, lp); + void * m_remote = tw_event_data(e); + memcpy(m_remote, event_data, remote_event_size); + tw_event_send(e); + } + return; } -void dragonfly_dally_sample_rc_fn(terminal_state * s, - tw_bf * bf, - terminal_dally_message * msg, - tw_lp * lp) -{ - (void)lp; - (void)bf; - (void)msg; - s->op_arr_size--; - int cur_indx = s->op_arr_size; - struct dfly_cn_sample stat = s->sample_stat[cur_indx]; - s->busy_time_sample = stat.busy_time_sample; - s->fin_chunks_time = stat.fin_chunks_time; - s->fin_hops_sample = stat.fin_hops_sample; - s->data_size_sample = stat.data_size_sample; - s->fin_chunks_sample = stat.fin_chunks_sample; - s->fwd_events = stat.fwd_events; - s->rev_events = stat.rev_events; +static void packet_arrive_rc(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ - stat.busy_time_sample = 0; - stat.fin_chunks_time = 0; - stat.fin_hops_sample = 0; - stat.data_size_sample = 0; - stat.fin_chunks_sample = 0; - stat.end_time = 0; - stat.terminal_id = 0; - stat.fwd_events = 0; - stat.rev_events = 0; -} + for(int i = 0; i < msg->num_rngs; i++) + tw_rand_reverse_unif(lp->rng); -void dragonfly_dally_sample_fn(terminal_state * s, - tw_bf * bf, - terminal_dally_message * msg, - tw_lp * lp) -{ - (void)lp; - (void)msg; - (void)bf; + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); - if(s->op_arr_size >= s->max_arr_size) + if(bf->c31) { - /* In the worst case, copy array to a new memory location, its very - * expensive operation though */ - struct dfly_cn_sample * tmp = (dfly_cn_sample *)calloc((MAX_STATS + s->max_arr_size), sizeof(struct dfly_cn_sample)); - memcpy(tmp, s->sample_stat, s->op_arr_size * sizeof(struct dfly_cn_sample)); - free(s->sample_stat); - s->sample_stat = tmp; - s->max_arr_size += MAX_STATS; + s->packet_fin--; + packet_fin--; } - - int cur_indx = s->op_arr_size; - s->sample_stat[cur_indx].terminal_id = s->terminal_id; - s->sample_stat[cur_indx].fin_chunks_sample = s->fin_chunks_sample; - s->sample_stat[cur_indx].data_size_sample = s->data_size_sample; - s->sample_stat[cur_indx].fin_hops_sample = s->fin_hops_sample; - s->sample_stat[cur_indx].fin_chunks_time = s->fin_chunks_time; - s->sample_stat[cur_indx].busy_time_sample = s->busy_time_sample; - s->sample_stat[cur_indx].end_time = tw_now(lp); - s->sample_stat[cur_indx].fwd_events = s->fwd_events; - s->sample_stat[cur_indx].rev_events = s->rev_events; + if(msg->path_type == MINIMAL) + minimal_count--; + if(msg->path_type == NON_MINIMAL) + nonmin_count--; - s->op_arr_size++; - s->fin_chunks_sample = 0; - s->data_size_sample = 0; - s->fin_hops_sample = 0; - s->fwd_events = 0; - s->rev_events = 0; - s->fin_chunks_time = 0; - s->busy_time_sample = 0; -} + N_finished_chunks--; + s->finished_chunks--; + s->fin_chunks_sample--; + s->ross_sample.fin_chunks_sample--; + s->fin_chunks_ross_sample--; -void dragonfly_dally_sample_fin(terminal_state * s, - tw_lp * lp) -{ - (void)lp; - - if(!g_tw_mynode) - { + total_hops -= msg->my_N_hop; + s->total_hops -= msg->my_N_hop; + s->fin_hops_sample -= msg->my_N_hop; + s->ross_sample.fin_hops_sample -= msg->my_N_hop; + s->fin_hops_ross_sample -= msg->my_N_hop; + s->fin_chunks_time = msg->saved_sample_time; + s->ross_sample.fin_chunks_time = msg->saved_sample_time; + s->fin_chunks_time_ross_sample = msg->saved_fin_chunks_ross; + s->total_time = msg->saved_avg_time; - /* write metadata file */ - char meta_fname[64]; - sprintf(meta_fname, "dragonfly-cn-sampling.meta"); + struct qhash_head * hash_link = NULL; + struct dfly_qhash_entry * tmp = NULL; + + struct dfly_hash_key key; + key.message_id = msg->message_id; + key.sender_id = msg->sender_lp; + + hash_link = qhash_search(s->rank_tbl, &key); + tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); + + mn_stats* stat; + stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->recv_time = msg->saved_rcv_time; - FILE * fp = fopen(meta_fname, "w"); - fprintf(fp, "Compute node sample format\nterminal_id (tw_lpid) \nfinished chunks (long)" - "\ndata size per sample (long) \nfinished hops (double) \ntime to finish chunks (double)" - "\nbusy time (double)\nsample end time(double) \nforward events (long) \nreverse events (long)"); - fclose(fp); + if(bf->c1) + { + stat->recv_count--; + stat->recv_bytes -= msg->packet_size; + N_finished_packets--; + s->finished_packets--; } - - char rt_fn[MAX_NAME_LENGTH]; - if(strncmp(cn_sample_file, "", 10) == 0) - sprintf(rt_fn, "dragonfly-cn-sampling-%ld.bin", g_tw_mynode); - else - sprintf(rt_fn, "%s-%ld.bin", cn_sample_file, g_tw_mynode); - - FILE * fp = fopen(rt_fn, "a"); - fseek(fp, sample_bytes_written, SEEK_SET); - fwrite(s->sample_stat, sizeof(struct dfly_cn_sample), s->op_arr_size, fp); - fclose(fp); - - sample_bytes_written += (s->op_arr_size * sizeof(struct dfly_cn_sample)); -} - -static void terminal_buf_update_rc(terminal_state * s, - tw_bf * bf, - terminal_dally_message * msg, - tw_lp * lp) -{ - int vcg = 0; - int num_qos_levels = s->params->num_qos_levels; - - for(int i = 0; i < msg->num_cll; i++) - codes_local_latency_reverse(lp); - - if(num_qos_levels > 1) - vcg = get_vcg_from_category(msg); - s->vc_occupancy[vcg] += s->params->chunk_size; - if(bf->c1) { - s->in_send_loop = 0; - } - - return; -} -/* update the compute node-router channel buffer */ -static void -terminal_buf_update(terminal_state * s, - tw_bf * bf, - terminal_dally_message * msg, - tw_lp * lp) -{ - msg->num_cll = 0; - msg->num_rngs = 0; + if(bf->c22) + { + s->max_latency = msg->saved_available_time; + } + if(bf->c7) + { + //assert(!hash_link); + if(bf->c4) + model_net_event_rc2(lp, &msg->event_rc); + + N_finished_msgs--; + s->finished_msgs--; + total_msg_sz -= msg->total_size; + s->total_msg_size -= msg->total_size; + s->data_size_sample -= msg->total_size; + s->ross_sample.data_size_sample -= msg->total_size; + s->data_size_ross_sample -= msg->total_size; - bf->c1 = 0; - bf->c2 = 0; - bf->c3 = 0; - int vcg = 0; + struct dfly_qhash_entry * d_entry_pop = (dfly_qhash_entry *)rc_stack_pop(s->st); + qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link)); + s->rank_tbl_pop++; - int num_qos_levels = s->params->num_qos_levels; + if(s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) + tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); - if(num_qos_levels > 1) - vcg = get_vcg_from_category(msg); + hash_link = &(d_entry_pop->hash_link); + tmp = d_entry_pop; - msg->num_cll++; - tw_stime ts = codes_local_latency(lp); - s->vc_occupancy[vcg] -= s->params->chunk_size; - - if(s->in_send_loop == 0 && s->terminal_msgs[vcg] != NULL) { - terminal_dally_message *m; - bf->c1 = 1; - tw_event* e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY, - (void**)&m, NULL); - m->type = T_SEND; - m->magic = terminal_magic_num; - s->in_send_loop = 1; - tw_event_send(e); } + + assert(tmp); + tmp->num_chunks--; + + if(bf->c5) + { + qhash_del(hash_link); + free_tmp(tmp); + s->rank_tbl_pop--; + } + return; } -void -dragonfly_dally_terminal_final( terminal_state * s, - tw_lp * lp ) +/* packet arrives at the destination terminal */ +static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { - // printf("terminal id %d\n",s->terminal_id); - dragonfly_total_time += s->total_time; //increment the PE level time counter - model_net_print_stats(lp->gid, s->dragonfly_stats_array); - int written = 0; - - if(s->terminal_id == 0) + if (msg->my_N_hop > s->params->max_hops_notify) { - written += sprintf(s->output_buf + written, "# Format < dest_type> \n"); -// fprintf(fp, "# Format <# Flits/Packets finished> \n"); + printf("Terminal received a packet with %d hops! (Notify on > than %d)\n",msg->my_N_hop, s->params->max_hops_notify); } - written += sprintf(s->output_buf + written, "\n%u %s %llu %s %s %llu %lf %d", - s->terminal_id, "T", s->router_id, "R", "CN", LLU(s->total_msg_size), s->busy_time, s->stalled_chunks); - lp_io_write(lp->gid, (char*)"dragonfly-link-stats", written, s->output_buf); - - // if(s->terminal_id == 0) - // { - // //fclose(dragonfly_term_bw_log); - // char meta_filename[128]; - // sprintf(meta_filename, "dragonfly-cn-stats.meta"); - - // FILE * fp = NULL; - // fp = fopen(meta_filename, "w"); - // if(fp) - // fprintf(fp, "# Format <# Flits/Packets finished> \n"); - // fclose(fp); - // } - - written = 0; - if(s->terminal_id == 0) - { - written += sprintf(s->output_buf2 + written, "# Format <# Packets finished> \n"); - } - written += sprintf(s->output_buf2 + written, "%llu %llu %llu %llu %lf %lf %lf %llu %lf %lf\n", - lp->gid, s->terminal_id, s->total_gen_size, s->total_msg_size, s->total_time/s->finished_chunks, s->max_latency, s->min_latency, - s->finished_packets, (double)s->total_hops/s->finished_chunks), s->busy_time; - - if(s->terminal_msgs[0] != NULL) - printf("[%llu] leftover terminal messages \n", LLU(lp->gid)); - lp_io_write(lp->gid, (char*)"dragonfly-cn-stats", written, s->output_buf2); + // NIC aggregation - should this be a separate function? + // Trigger an event on receiving server + msg->num_rngs = 0; + msg->num_cll = 0; - //if(s->packet_gen != s->packet_fin) - // printf("\n generated %d finished %d ", s->packet_gen, s->packet_fin); - - if(s->rank_tbl) - qhash_finalize(s->rank_tbl); + if(!s->rank_tbl) + s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE); - rc_stack_destroy(s->st); - free(s->vc_occupancy); - free(s->terminal_msgs); - free(s->terminal_msgs_tail); -} + struct dfly_hash_key key; + key.message_id = msg->message_id; + key.sender_id = msg->sender_lp; + + struct qhash_head *hash_link = NULL; + struct dfly_qhash_entry * tmp = NULL; + + hash_link = qhash_search(s->rank_tbl, &key); + + if(hash_link) + tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); -void dragonfly_dally_router_final(router_state * s, tw_lp * lp) -{ - free(s->global_channel); - int i, j; - for(i = 0; i < s->params->radix; i++) { - for(j = 0; j < s->params->num_vcs; j++) { - if(s->queued_msgs[i][j] != NULL) { - printf("[%llu] leftover queued messages %d %d %d\n", LLU(lp->gid), i, j, - s->vc_occupancy[i][j]); - } - if(s->pending_msgs[i][j] != NULL) { - printf("[%llu] lefover pending messages %d %d\n", LLU(lp->gid), i, j); - } - } - } + uint64_t total_chunks = msg->total_size / s->params->chunk_size; - if(s->router_id == 0) - fclose(dragonfly_rtr_bw_log); + if(msg->total_size % s->params->chunk_size) + total_chunks++; - rc_stack_destroy(s->st); - - const dragonfly_param *p = s->params; - int written = 0; - int src_rel_id = s->router_id % p->num_routers; - int local_grp_id = s->router_id / p->num_routers; - for(int d = 0; d <= p->intra_grp_radix; d++) + if(!total_chunks) + total_chunks = 1; + + /*if(tmp) { - if(d != src_rel_id) + if(tmp->num_chunks >= total_chunks || tmp->num_chunks < 0) { - int dest_ab_id = local_grp_id * p->num_routers + d; - written += sprintf(s->output_buf + written, "\n%d %s %d %s %s %llu %lf %lu", - s->router_id, - "R", - dest_ab_id, - "R", - "L", - s->link_traffic[d], - s->busy_time[d], - s->stalled_chunks[d]); + //tw_output(lp, "\n invalid number of chunks %d for LP %ld ", tmp->num_chunks, lp->gid); + tw_lp_suspend(lp, 0, 0); + return; } - } - - vector< Connection > my_global_links = s->connMan->get_connections_by_type(CONN_GLOBAL); - vector< Connection >::iterator it = my_global_links.begin(); + }*/ + assert(lp->gid == msg->dest_terminal_lpid); - for(; it != my_global_links.end(); it++) - { - int dest_rtr_id = it->dest_gid; - int port_no = it->port; - assert(port_no >= 0 && port_no < p->radix); - written += sprintf(s->output_buf + written, "\n%d %s %d %s %s %llu %lf %lu", - s->router_id, - "R", - dest_rtr_id, - "R", - "G", - s->link_traffic[port_no], - s->busy_time[port_no], - s->stalled_chunks[port_no]); - } + if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) + printf("\n Packet %llu arrived at lp %llu hops %d ", LLU(msg->sender_lp), LLU(lp->gid), msg->my_N_hop); + + msg->num_rngs++; + tw_stime ts = g_tw_lookahead + s->params->cn_credit_delay + tw_rand_unif(lp->rng); - sprintf(s->output_buf + written, "\n"); - lp_io_write(lp->gid, (char*)"dragonfly-link-stats", written, s->output_buf); + // no method_event here - message going to router + tw_event * buf_e; + terminal_dally_message * buf_msg; + buf_e = model_net_method_event_new(msg->intm_lp_id, ts, lp, + DRAGONFLY_DALLY_ROUTER, (void**)&buf_msg, NULL); + buf_msg->magic = router_magic_num; + buf_msg->vc_index = msg->vc_index; + buf_msg->output_chan = msg->output_chan; + buf_msg->type = R_BUFFER; + tw_event_send(buf_e); - /*if(!s->router_id) - { - written = sprintf(s->output_buf, "# Format "); - written += sprintf(s->output_buf + written, "# Router ports in the order: %d green links, %d black links %d global channels \n", - p->num_router_cols * p->num_row_chans, p->num_router_rows * p->num_col_chans, p->num_global_channels); - } - written += sprintf(s->output_buf2 + written, "\n %llu %d %d", - LLU(lp->gid), - s->router_id / p->num_routers, - s->router_id % p->num_routers); + bf->c1 = 0; + bf->c3 = 0; + bf->c4 = 0; + bf->c7 = 0; - for(int d = 0; d < p->radix; d++) - written += sprintf(s->output_buf2 + written, " %lld", LLD(s->link_traffic[d])); + /* Total overall finished chunks in simulation */ + N_finished_chunks++; + /* Finished chunks on a LP basis */ + s->finished_chunks++; + /* Finished chunks per sample */ + s->fin_chunks_sample++; + s->ross_sample.fin_chunks_sample++; + s->fin_chunks_ross_sample++; - lp_io_write(lp->gid, (char*)"dragonfly-router-traffic", written, s->output_buf2); - */ - if (!g_tw_mynode) { - if (s->router_id == 0) { - if (PRINT_CONFIG) - dragonfly_print_params(s->params); - } - } -} + /* WE do not allow self messages through dragonfly */ + assert(lp->gid != msg->src_terminal_id); -static int dfdally_score_connection(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, Connection conn, conn_minimality_t c_minimality) -{ - int score = 0; - int port = conn.port; + uint64_t num_chunks = msg->packet_size / s->params->chunk_size; + if (msg->packet_size < s->params->chunk_size) + num_chunks++; - if (port == -1) { - return INT_MAX; - } + if(msg->path_type == MINIMAL) + minimal_count++; - switch (scoring) { - case ALPHA: //considers vc occupancy and queued count only - for(int k=0; k < s->params->num_vcs; k++) - { - score += s->vc_occupancy[port][k]; - } - score += s->queued_count[port]; - break; - case BETA: //considers vc occupancy and queued count multiplied by the number of minimal hops to destination from the potential next stop - tw_error(TW_LOC, "Beta scoring not implemented"); - break; - case GAMMA: //delta scoring but higher is better - tw_error(TW_LOC, "Gamma scoring not implemented"); - break; - case DELTA: //alpha but biased 2:1 toward minimal - for(int k=0; k < s->params->num_vcs; k++) - { - score += s->vc_occupancy[port][k]; - } - score += s->queued_count[port]; + if(msg->path_type == NON_MINIMAL) + nonmin_count++; - if (c_minimality != C_MIN) - score = score * 2; - break; - default: - tw_error(TW_LOC, "Unsupported Scoring Protocol Error\n"); + if(msg->chunk_id == num_chunks - 1) + { + bf->c31 = 1; + s->packet_fin++; + packet_fin++; } - return score; -} + if(msg->path_type != MINIMAL && msg->path_type != NON_MINIMAL) + printf("\n Wrong message path type %d ", msg->path_type); -static void dfdally_select_intermediate_group(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) -{ - int fdest_group_id = fdest_router_id / s->params->num_routers; - int origin_group_id = msg->origin_router_id / s->params->num_routers; + /* save the sample time */ + msg->saved_sample_time = s->fin_chunks_time; + s->fin_chunks_time += (tw_now(lp) - msg->travel_start_time); + s->ross_sample.fin_chunks_time += (tw_now(lp) - msg->travel_start_time); + msg->saved_fin_chunks_ross = s->fin_chunks_time_ross_sample; + s->fin_chunks_time_ross_sample += (tw_now(lp) - msg->travel_start_time); + + /* save the total time per LP */ + msg->saved_avg_time = s->total_time; + s->total_time += (tw_now(lp) - msg->travel_start_time); + total_hops += msg->my_N_hop; + s->total_hops += msg->my_N_hop; + s->fin_hops_sample += msg->my_N_hop; + s->ross_sample.fin_hops_sample += msg->my_N_hop; + s->fin_hops_ross_sample += msg->my_N_hop; - // Has an intermediate group been chosen yet? (Should happen at first router) - if (msg->intm_grp_id == -1) { // Intermediate group hasn't been chosen yet, choose one randomly and route toward it - assert(s->router_id == msg->origin_router_id); - msg->num_rngs++; - int rand_group_id; - if (NONMIN_INCLUDE_SOURCE_DEST) //then any group is a valid intermediate group - rand_group_id = tw_rand_integer(lp->rng, 0, s->params->num_groups-1); - else { //then we don't consider source or dest groups as valid intermediate groups - vector group_list; - for (int i = 0; i < s->params->num_groups; i++) - { - if ((i != origin_group_id) && (i != fdest_group_id)) { - group_list.push_back(i); - } - } - int rand_sel = tw_rand_integer(lp->rng, 0, group_list.size()-1); - rand_group_id = group_list[rand_sel]; - } - msg->intm_grp_id = rand_group_id; - } - else { //the only time that it is re-set is when a router didn't have a direct connection to the intermediate group but had no other options - // so we need to pick an intm group that the current router DOES have a connection to. - assert(s->router_id != msg->origin_router_id); + mn_stats* stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + msg->saved_rcv_time = stat->recv_time; + stat->recv_time += (tw_now(lp) - msg->travel_start_time); - set< int > valid_intm_groups; - vector< Connection > global_conns = s->connMan->get_connections_by_type(CONN_GLOBAL); - for (Connection conn : global_conns) { - if (NONMIN_INCLUDE_SOURCE_DEST) //then any group I connect to is valid - { - valid_intm_groups.insert(conn.dest_group_id); - } - else - { - if ((conn.dest_group_id != fdest_group_id) && (conn.dest_group_id != origin_group_id)) - valid_intm_groups.insert(conn.dest_group_id); - } - } +#if DEBUG == 1 + if( msg->packet_ID == TRACK + && msg->chunk_id == num_chunks-1 + && msg->message_id == TRACK_MSG) + { + printf( "(%lf) [Terminal %d] packet %lld has arrived \n", + tw_now(lp), (int)lp->gid, msg->packet_ID); - int rand_sel = tw_rand_integer(lp->rng, 0, valid_intm_groups.size()-1); - msg->num_rngs++; - set< int >::iterator it = valid_intm_groups.begin(); - advance(it, rand_sel); //you can't just use [] to access a set - msg->intm_grp_id = *it; - } -} + printf("travel start time is %f\n", + msg->travel_start_time); -//Now returns random selection from tied best connections. -static Connection get_absolute_best_connection_from_conns(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, vector< Connection > conns) -{ - if (conns.size() == 0) { //passed no connections to this but we got to return something - return negative filled conn to force a break if not caught - Connection bad_conn; - bad_conn.src_gid = -1; - bad_conn.port = -1; - return bad_conn; - } - if (conns.size() == 1) { //no need to compare singular connection - return conns[0]; + printf("My hop now is %d\n",msg->my_N_hop); } +#endif - int num_to_compare = conns.size(); - int scores[num_to_compare]; - vector < Connection > best_conns; - int best_score = INT_MAX; + /* Now retreieve the number of chunks completed from the hash and update + * them */ + void *m_data_src = model_net_method_get_edata(DRAGONFLY_DALLY, msg); - for(int i = 0; i < num_to_compare; i++) + /* If an entry does not exist then create one */ + if(!tmp) { - scores[i] = dfdally_score_connection(s, bf, msg, lp, conns[i], C_MIN); - if (scores[i] <= best_score) { - if (scores[i] < best_score) { - best_score = scores[i]; - best_conns.clear(); - best_conns.push_back(conns[i]); - } - else { - best_conns.push_back(conns[i]); - } - } - } - - assert(best_conns.size() > 0); - - msg->num_rngs++; - return best_conns[tw_rand_integer(lp->rng, 0, best_conns.size()-1)]; -} - -//when using this function, you should assume that the self router is NOT the destination. That should be handled elsewhere. -static vector< Connection > get_legal_minimal_stops(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) -{ - int my_router_id = s->router_id; - int my_group_id = s->group_id; - int origin_group_id = msg->origin_router_id / s->params->num_routers; - int fdest_group_id = fdest_router_id / s->params->num_routers; - - if (my_group_id != fdest_group_id) { //we're in origin group or intermediate group - either way we need to route to fdest group minimally - vector< Connection > conns_to_dest_group = s->connMan->get_connections_to_group(fdest_group_id); - if (conns_to_dest_group.size() > 0) { //then we have a direct connection to dest group - return conns_to_dest_group; // --------- return direct connection - } - else { //we don't have a direct connection to group and need list of routers in our group that do - vector< Connection > poss_next_conns_to_group; - set< int > poss_router_id_set_to_group; //TODO this might be a source of non-determinism(?) - for(int i = 0; i < connectionList[my_group_id][fdest_group_id].size(); i++) - { - int poss_router_id = connectionList[my_group_id][fdest_group_id][i]; - if (poss_router_id_set_to_group.count(poss_router_id) == 0) { //we only want to consider a single router id once (we look at all connections to it using the conn man) - vector< Connection > conns = s->connMan->get_connections_to_gid(poss_router_id, CONN_LOCAL); - poss_router_id_set_to_group.insert(poss_router_id); - poss_next_conns_to_group.insert(poss_next_conns_to_group.end(), conns.begin(), conns.end()); - } - } - return poss_next_conns_to_group; // --------- return non-direct connection (still minimal though) - } + bf->c5 = 1; + struct dfly_qhash_entry * d_entry = (dfly_qhash_entry *)calloc(1, sizeof (struct dfly_qhash_entry)); + d_entry->num_chunks = 0; + d_entry->key = key; + d_entry->remote_event_data = NULL; + d_entry->remote_event_size = 0; + qhash_add(s->rank_tbl, &key, &(d_entry->hash_link)); + s->rank_tbl_pop++; + + if(s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) + tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); + + hash_link = &(d_entry->hash_link); + tmp = d_entry; } - else { //then we're in the final destination group, also we assume that we're not the fdest router - assert(my_group_id == fdest_group_id); - assert(my_router_id != fdest_router_id); //this should be handled outside of this function + + assert(tmp); + tmp->num_chunks++; - vector< Connection > conns_to_fdest_router = s->connMan->get_connections_to_gid(fdest_router_id, CONN_LOCAL); - return conns_to_fdest_router; + if(msg->chunk_id == num_chunks - 1) + { + bf->c1 = 1; + stat->recv_count++; + stat->recv_bytes += msg->packet_size; + + N_finished_packets++; + s->finished_packets++; } -} -//Note that this is different than Dragonfly Plus's implementation, this isn't the converse of minimal, these are any -//connections that could lead to the intermediate group or a new one if necessary -static vector< Connection > get_legal_nonminimal_stops(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) -{ - int my_router_id = s->router_id; - int my_group_id = s->group_id; - int origin_group_id = msg->origin_router_id / s->params->num_routers; - int fdest_group_id = fdest_router_id / s->params->num_routers; - bool in_intermediate_group = (my_group_id != origin_group_id) && (my_group_id != fdest_group_id); - int preset_intm_group_id = msg->intm_grp_id; + /* if its the last chunk of the packet then handle the remote event data */ + if(msg->remote_event_size_bytes > 0 && !tmp->remote_event_data) + { + /* Retreive the remote event entry */ + tmp->remote_event_data = (char*)calloc(1, msg->remote_event_size_bytes); + assert(tmp->remote_event_data); + tmp->remote_event_size = msg->remote_event_size_bytes; + memcpy(tmp->remote_event_data, m_data_src, msg->remote_event_size_bytes); + } + + if(s->min_latency > tw_now(lp) - msg->travel_start_time) { + s->min_latency = tw_now(lp) - msg->travel_start_time; + } + if(s->max_latency < tw_now( lp ) - msg->travel_start_time) { + bf->c22 = 1; + msg->saved_available_time = s->max_latency; + s->max_latency = tw_now(lp) - msg->travel_start_time; + } + /* If all chunks of a message have arrived then send a remote event to the + * callee*/ + //assert(tmp->num_chunks <= total_chunks); - if (my_group_id == origin_group_id) { - vector< Connection > conns_to_intm_group = s->connMan->get_connections_to_group(preset_intm_group_id); + if(tmp->num_chunks >= total_chunks) + { + bf->c7 = 1; - //are we the originating router - if (my_router_id == msg->origin_router_id) { //then we are able to route within our own group if necessary - // Do we have direct connection to intermediate group? - if (conns_to_intm_group.size() > 0) { //yes - return conns_to_intm_group; - } - else { //no - route within group to router that DOES have a connection to intm group - vector connecting_router_ids = connectionList[my_group_id][preset_intm_group_id]; - vector< Connection > conns_to_connecting_routers; - for (int i = 0; i < connecting_router_ids.size(); i++) - { - int poss_router_id = connecting_router_ids[i]; - vector< Connection > candidate_conns = s->connMan->get_connections_to_gid(poss_router_id, CONN_LOCAL); - conns_to_connecting_routers.insert(conns_to_connecting_routers.end(), candidate_conns.begin(), candidate_conns.end()); - } - return conns_to_connecting_routers; - } - } - else { //then we can't afford to reroute within our group, we must route to the int group if possible - pick a new one if not - if (conns_to_intm_group.size() > 0) { - return conns_to_intm_group; //route there directly - } - else { //pick a new one! - dfdally_select_intermediate_group(s, bf, msg, lp, fdest_router_id); - conns_to_intm_group = s->connMan->get_connections_to_group(msg->intm_grp_id); //new intm group id - return conns_to_intm_group; - } + s->data_size_sample += msg->total_size; + s->ross_sample.data_size_sample += msg->total_size; + s->data_size_ross_sample += msg->total_size; + N_finished_msgs++; + total_msg_sz += msg->total_size; + s->total_msg_size += msg->total_size; + s->finished_msgs++; + + //assert(tmp->remote_event_data && tmp->remote_event_size > 0); + if(tmp->remote_event_data && tmp->remote_event_size > 0) { + send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size); } - } - else if (in_intermediate_group) { - //if we're in the intermediate group then we're just going to default to routing minimally, return an empty vector. - vector< Connection > empty; - return empty; - } - else if (my_group_id == fdest_group_id) - { - //same as intermediate, force minimal choices - vector< Connection > empty; - return empty; - } + /* Remove the hash entry */ + qhash_del(hash_link); + rc_stack_push(lp, tmp, free_tmp, s->st); + s->rank_tbl_pop--; + } + return; } -static Connection dfdally_minimal_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) +static void terminal_buf_update_rc(terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) { - vector< Connection > poss_next_stops = get_legal_minimal_stops(s, bf, msg, lp, fdest_router_id); - if (poss_next_stops.size() < 1) - tw_error(TW_LOC, "MINIMAL DEAD END\n"); + int vcg = 0; + int num_qos_levels = s->params->num_qos_levels; - ConnectionType conn_type = poss_next_stops[0].conn_type; //TODO this assumes that all possible next stops are of same type - OK for now, but remember this - if (conn_type == CONN_GLOBAL) { //TOOD should we really only randomize global and not local? should we really do light adaptive for nonglobal? - msg->num_rngs++; - int rand_sel = tw_rand_integer(lp->rng, 0, poss_next_stops.size() - 1); - return poss_next_stops[rand_sel]; - } - else - { - Connection best_min_conn = get_absolute_best_connection_from_conns(s, bf, msg, lp, poss_next_stops); - return best_min_conn; + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + if(num_qos_levels > 1) + vcg = get_vcg_from_category(msg); + + s->vc_occupancy[vcg] += s->params->chunk_size; + if(bf->c1) { + s->in_send_loop = 0; } -} -// Coloquially: "Valiant Group Routing" -// This follows the randomized indirect routing algorithm detailed in "Cost-Efficient Dragonfly topology for Large-Scale Systems" and -// "Technology-Driven, Highly-Scalable Dragonfly Topology" by Kim, Dally, Scott, and Abts -// They sourced it from "A scheme for fast parallel communication" by L.G. Valiant -// It differs from true valiant routing in that it randomly selects a GROUP and routes to it - not a random intermediate router -static Connection dfdally_nonminimal_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) + return; +} +/* update the compute node-router channel buffer */ +static void terminal_buf_update(terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp) { - int my_router_id = s->router_id; - int my_group_id = s->group_id; - int fdest_group_id = fdest_router_id / s->params->num_routers; - int origin_group_id = msg->origin_router_id / s->params->num_routers; - - assert(msg->intm_grp_id != -1); // This needs to have already been set. - // //The setting of intm_grp_id is kept out of this function so that other routing algorithms can utilize this routing function - // //and set it themselves based on the context. This helps avoid an instance like: prog-adaptive routing, second router in path decides - // //to take non-minimal routing but the pre-selected intermediate group ID isn't accessible to it and would require re-routing and - // //thus take extra hops. Possible illegal move and could cause deadlock. - - if (my_group_id == msg->intm_grp_id) //then we've visited the intermediate group by definition - msg->is_intm_visited = 1; + msg->num_cll = 0; + msg->num_rngs = 0; - int next_dest_group_id; //The ID of the group that we are aiming for next - either intermediate group or fdest group - if (msg->is_intm_visited == 1) // Then we need to route to the fdest group - next_dest_group_id = fdest_group_id; - else // Then we haven't visited the intermediate group yet and need to route there first - next_dest_group_id = msg->intm_grp_id; + bf->c1 = 0; + bf->c2 = 0; + bf->c3 = 0; + int vcg = 0; + + int num_qos_levels = s->params->num_qos_levels; - // Do I have a direct connection to the next_dest group? - vector< Connection > conns_to_next_group = s->connMan->get_connections_to_group(next_dest_group_id); - if (conns_to_next_group.size() > 0) { //Then yes I do - msg->num_rngs++; - int rand_sel = tw_rand_integer(lp->rng, 0, conns_to_next_group.size()-1); - Connection next_conn = conns_to_next_group[rand_sel]; - return next_conn; - } - else { // I need to route to a router in my group that does have a direct connection to the intermediate group - vector connecting_router_ids = connectionList[my_group_id][next_dest_group_id]; - assert(connecting_router_ids.size() > 0); - msg->num_rngs++; - int rand_sel = tw_rand_integer(lp->rng, 0, connecting_router_ids.size()-1); - int conn_router_id = connecting_router_ids[rand_sel]; + if(num_qos_levels > 1) + vcg = get_vcg_from_category(msg); - //There may be parallel connections to the same router - randomly select from them - vector< Connection > conns_to_next_router = s->connMan->get_connections_to_gid(conn_router_id, CONN_LOCAL); - assert(conns_to_next_router.size() > 0); - msg->num_rngs++; - rand_sel = tw_rand_integer(lp->rng, 0, conns_to_next_router.size()-1); - Connection next_conn = conns_to_next_router[rand_sel]; - return next_conn; + msg->num_cll++; + tw_stime ts = codes_local_latency(lp); + s->vc_occupancy[vcg] -= s->params->chunk_size; + + if(s->in_send_loop == 0 && s->terminal_msgs[vcg] != NULL) { + terminal_dally_message *m; + bf->c1 = 1; + tw_event* e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_DALLY, + (void**)&m, NULL); + m->type = T_SEND; + m->magic = terminal_magic_num; + s->in_send_loop = 1; + tw_event_send(e); } + return; } -// This is not the most efficient way to do things as k approaches the size(conns). -// For low k it's more efficient than doing a full shuffle to sample a few random indices, though. -static vector< Connection > dfdally_poll_k_connections(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, vector< Connection > conns, int k) +void +dragonfly_dally_terminal_final( terminal_state * s, + tw_lp * lp ) { - vector< Connection > k_conns; - if (conns.size() == 0) + // printf("terminal id %d\n",s->terminal_id); + dragonfly_total_time += s->total_time; //increment the PE level time counter + + if (s->max_latency > dragonfly_max_latency) + dragonfly_max_latency = s->max_latency; //get maximum latency across all LPs on this PE + + + model_net_print_stats(lp->gid, s->dragonfly_stats_array); + int written = 0; + + if(s->terminal_id == 0) { - return k_conns; + written += sprintf(s->output_buf + written, "# Format < dest_type> \n"); +// fprintf(fp, "# Format <# Flits/Packets finished> \n"); } + written += sprintf(s->output_buf + written, "%u %s %u %s %s %llu %lf %lu\n", + s->terminal_id, "T", s->router_id, "R", "CN", LLU(s->total_msg_size), s->busy_time, s->stalled_chunks); - if (conns.size() == 1) + lp_io_write(lp->gid, (char*)"dragonfly-link-stats", written, s->output_buf); + + // if(s->terminal_id == 0) + // { + // //fclose(dragonfly_term_bw_log); + // char meta_filename[128]; + // sprintf(meta_filename, "dragonfly-cn-stats.meta"); + + // FILE * fp = NULL; + // fp = fopen(meta_filename, "w"); + // if(fp) + // fprintf(fp, "# Format <# Flits/Packets finished> \n"); + // fclose(fp); + // } + + written = 0; + if(s->terminal_id == 0) { - k_conns.push_back(conns[0]); - return k_conns; + written += sprintf(s->output_buf2 + written, "# Format <# Packets finished> \n"); } + written += sprintf(s->output_buf2 + written, "%llu %u %d %llu %lf %lf %lf %ld %lf %lf\n", + LLU(lp->gid), s->terminal_id, s->total_gen_size, LLU(s->total_msg_size), s->total_time/s->finished_chunks, s->max_latency, s->min_latency, + s->finished_packets, (double)s->total_hops/s->finished_chunks, s->busy_time); - if (k == 2) { //This is the default and so let's make a cheaper optimization for it - msg->num_rngs += 2; + if(s->terminal_msgs[0] != NULL) + printf("[%llu] leftover terminal messages \n", LLU(lp->gid)); + lp_io_write(lp->gid, (char*)"dragonfly-cn-stats", written, s->output_buf2); - int rand_sel_1, rand_sel_2, rand_sel_2_offset; - rand_sel_1 = tw_rand_integer(lp->rng, 0, conns.size()-1); - rand_sel_2_offset = tw_rand_integer(lp->rng, 1, conns.size()-1); - rand_sel_2 = (rand_sel_1 + rand_sel_2_offset) % conns.size(); - k_conns.push_back(conns[rand_sel_1]); - k_conns.push_back(conns[rand_sel_2]); + //if(s->packet_gen != s->packet_fin) + // printf("\n generated %d finished %d ", s->packet_gen, s->packet_fin); + + if(s->rank_tbl) + qhash_finalize(s->rank_tbl); + + rc_stack_destroy(s->st); + free(s->vc_occupancy); + free(s->terminal_msgs); + free(s->terminal_msgs_tail); +} - return k_conns; +void dragonfly_dally_router_final(router_state * s, tw_lp * lp) +{ + free(s->global_channel); + int i, j; + for(i = 0; i < s->params->radix; i++) { + for(j = 0; j < s->params->num_vcs; j++) { + if(s->queued_msgs[i][j] != NULL) { + printf("[%llu] leftover queued messages %d %d %d\n", LLU(lp->gid), i, j, + s->vc_occupancy[i][j]); + } + if(s->pending_msgs[i][j] != NULL) { + printf("[%llu] lefover pending messages %d %d\n", LLU(lp->gid), i, j); + } + } } - // if (k > conns.size()) - // tw_error(TW_LOC, "Attempted to poll k random connections but k (%d) is greater than number of connections (%d)",k,conns.size()); - // create set of unique random k indicies - int last_sel = 0; - set< int > rand_sels; - for (int i = 0; i < k; i++) + if(s->router_id == 0) + fclose(dragonfly_rtr_bw_log); + + rc_stack_destroy(s->st); + + const dragonfly_param *p = s->params; + int written = 0; + int src_rel_id = s->router_id % p->num_routers; + int local_grp_id = s->router_id / p->num_routers; + for(int d = 0; d <= p->intra_grp_radix; d++) { - int rand_int = tw_rand_integer(lp->rng, 0, (conns.size() - 1) - rand_sels.size()); - int attempt_offset = (last_sel + rand_int) % conns.size(); //get a hopefully unused index - this method of sampling without replacement results in only about - while (rand_sels.count(attempt_offset) != 0) //increment till we find an unused index + if(d != src_rel_id) { - attempt_offset = (attempt_offset + 1) % conns.size(); + int dest_ab_id = local_grp_id * p->num_routers + d; + written += sprintf(s->output_buf + written, "\n%d %s %d %s %s %llu %lf %lu", + s->router_id, + "R", + dest_ab_id, + "R", + "L", + LLU(s->link_traffic[d]), + s->busy_time[d], + s->stalled_chunks[d]); } - rand_sels.insert(attempt_offset); - last_sel = attempt_offset; - } - msg->num_rngs += k; // we only used the rng k times - - // use random k set to create vector of k connections - for (int index : rand_sels) - { - k_conns.push_back(conns[index]); } - return k_conns; -} - -// note that this is somewhat expensive the larger k is in comparison to the total possible -// consider an optimization to implement an efficient shuffle to poll k random sampling instead -// consider an optimization for the default of 2 -static Connection dfdally_get_best_from_k_connections(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, vector< Connection > conns, int k) -{ - vector< Connection > k_conns = dfdally_poll_k_connections(s, bf, msg, lp, conns, k); - return get_absolute_best_connection_from_conns(s, bf, msg, lp, k_conns); -} - -//Uses PAR algorithm -static Connection dfdally_prog_adaptive_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) -{ - int my_router_id = s->router_id; - int my_group_id = s->group_id; - int fdest_group_id = fdest_router_id / s->params->num_routers; - int origin_group_id = msg->origin_router_id / s->params->num_routers; - int adaptive_threshold = s->params->adaptive_threshold; - - // The check for detination group local routing has already been completed - we can assume we're not in the destination group - // are we in the intermediate group? - if (my_group_id == msg->intm_grp_id) - msg->is_intm_visited = 1; - - Connection nextStopConn; - vector< Connection > poss_min_next_stops = get_legal_minimal_stops(s, bf, msg, lp, fdest_router_id); - vector< Connection > poss_nonmin_next_stops = get_legal_nonminimal_stops(s, bf, msg, lp, fdest_router_id); - - Connection best_min_conn, best_nonmin_conn; - ConnectionType conn_type_of_mins, conn_type_of_nonmins; + vector< Connection > my_global_links = s->connMan->get_connections_by_type(CONN_GLOBAL); + vector< Connection >::iterator it = my_global_links.begin(); - if (poss_min_next_stops.size() > 0) - { - conn_type_of_mins = poss_min_next_stops[0].conn_type; // All of these in this vector should be the same... - } - if (poss_nonmin_next_stops.size() > 0) + for(; it != my_global_links.end(); it++) { - conn_type_of_nonmins = poss_nonmin_next_stops[0].conn_type; + int dest_rtr_id = it->dest_gid; + int port_no = it->port; + assert(port_no >= 0 && port_no < p->radix); + written += sprintf(s->output_buf + written, "\n%d %s %d %s %s %llu %lf %lu", + s->router_id, + "R", + dest_rtr_id, + "R", + "G", + LLU(s->link_traffic[port_no]), + s->busy_time[port_no], + s->stalled_chunks[port_no]); } - if (conn_type_of_mins == CONN_GLOBAL) - best_min_conn = dfdally_get_best_from_k_connections(s, bf, msg, lp, poss_min_next_stops, s->params->global_k_picks); - else - best_min_conn = get_absolute_best_connection_from_conns(s, bf, msg, lp, poss_min_next_stops); //could use from_k_connections function but that's very expensive when k == size of input connections - - if (conn_type_of_nonmins == CONN_GLOBAL) - best_nonmin_conn = dfdally_get_best_from_k_connections(s, bf, msg, lp, poss_nonmin_next_stops, s->params->global_k_picks); - else - best_nonmin_conn = get_absolute_best_connection_from_conns(s, bf, msg, lp, poss_nonmin_next_stops); - - int min_score = dfdally_score_connection(s, bf, msg, lp, best_min_conn, C_MIN); - int nonmin_score = dfdally_score_connection(s, bf, msg, lp, best_nonmin_conn, C_NONMIN); + sprintf(s->output_buf + written, "\n"); + lp_io_write(lp->gid, (char*)"dragonfly-link-stats", written, s->output_buf); - if ((msg->path_type == NON_MINIMAL) && (msg->is_intm_visited != 1)) { //if we're nonminimal and haven't reached the intermediate group yet - //must pick non-minimal (if we have visited, we can pick minimal then as nonminimal will be an empty vector) - return best_nonmin_conn; + /*if(!s->router_id) + { + written = sprintf(s->output_buf, "# Format "); + written += sprintf(s->output_buf + written, "# Router ports in the order: %d green links, %d black links %d global channels \n", + p->num_router_cols * p->num_row_chans, p->num_router_rows * p->num_col_chans, p->num_global_channels); } + written += sprintf(s->output_buf2 + written, "\n %llu %d %d", + LLU(lp->gid), + s->router_id / p->num_routers, + s->router_id % p->num_routers); - if (min_score <= adaptive_threshold) - return best_min_conn; - else if (min_score <= nonmin_score) - return best_min_conn; - else { - msg->path_type = NON_MINIMAL; - return best_nonmin_conn; - } - + for(int d = 0; d < p->radix; d++) + written += sprintf(s->output_buf2 + written, " %lld", LLD(s->link_traffic[d])); + lp_io_write(lp->gid, (char*)"dragonfly-router-traffic", written, s->output_buf2); + */ + // if (!g_tw_mynode) { + // if (s->router_id == 0) { + // if (PRINT_CONFIG) + // dragonfly_print_params(s->params); + // } + // } } static Connection do_dfdally_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) @@ -3768,14 +3623,71 @@ static void router_verify_valid_receipt(router_state *s, tw_bf *bf, terminal_dal } +/*When a packet is sent from the current router and a buffer slot becomes available, a credit is sent back to schedule another packet event*/ +static void router_credit_send(router_state * s, terminal_dally_message * msg, + tw_lp * lp, int sq, short* rng_counter) { + tw_event * buf_e; + tw_stime ts; + terminal_dally_message * buf_msg; + + int dest = 0, type = R_BUFFER; + int is_terminal = 0; + double credit_delay; + + const dragonfly_param *p = s->params; + + // Notify sender terminal about available buffer space + if(msg->last_hop == TERMINAL) { + dest = msg->src_terminal_id; + type = T_BUFFER; + is_terminal = 1; + credit_delay = p->cn_credit_delay; + } + else if(msg->last_hop == GLOBAL) { + dest = msg->intm_lp_id; + credit_delay = p->global_credit_delay; + } + else if(msg->last_hop == LOCAL) { + dest = msg->intm_lp_id; + credit_delay = p->local_credit_delay; + } + else + printf("\n Invalid message type"); + + (*rng_counter)++; + ts = g_tw_lookahead + credit_delay + tw_rand_unif(lp->rng); + + if (is_terminal) { + buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_DALLY, + (void**)&buf_msg, NULL); + buf_msg->magic = terminal_magic_num; + } + else { + buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_DALLY_ROUTER, + (void**)&buf_msg, NULL); + buf_msg->magic = router_magic_num; + } + + buf_msg->origin_router_id = s->router_id; + if(sq == -1) { + buf_msg->vc_index = msg->vc_index; + buf_msg->output_chan = msg->output_chan; + } else { + buf_msg->vc_index = msg->saved_vc; + buf_msg->output_chan = msg->saved_channel; + } + strcpy(buf_msg->category, msg->category); + buf_msg->type = type; + + tw_event_send(buf_e); + return; +} + static void router_packet_receive_rc(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) -{ - router_rev_ecount++; - router_ecount--; - +{ int output_port = msg->saved_vc; int output_chan = msg->saved_channel; @@ -3800,7 +3712,7 @@ static void router_packet_receive_rc(router_state * s, s->stalled_chunks[output_port]--; if(bf->c22) { - s->last_buf_full[output_port] = msg->saved_busy_time; + s->last_buf_full[output_port] = msg->saved_busy_time; } delete_terminal_dally_message_list(return_tail(s->queued_msgs[output_port], s->queued_msgs_tail[output_port], output_chan)); @@ -3819,8 +3731,6 @@ static void router_packet_receive( router_state * s, router_verify_valid_receipt(s, bf, msg, lp); - router_ecount++; - tw_stime ts; int num_qos_levels = s->params->num_qos_levels; @@ -3850,13 +3760,8 @@ static void router_packet_receive( router_state * s, int num_groups = s->params->num_groups; int total_routers = s->params->total_routers; - int next_stop = -1, output_port = -1, output_chan = -1, adap_chan = -1; + int next_stop = -1, output_port = -1, output_chan = -1; int dest_router_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_lpid, 0, 0) / s->params->num_cn; - int local_grp_id = s->router_id / num_routers; - int src_grp_id = msg->origin_router_id / num_routers; - int dest_grp_id = dest_router_id / num_routers; - int intm_router_id, intm_router_id_b; - short prev_path_type = 0, next_path_type = 0; terminal_dally_message_list * cur_chunk = (terminal_dally_message_list*)calloc(1, sizeof(terminal_dally_message_list)); init_terminal_dally_message_list(cur_chunk, msg); @@ -3993,8 +3898,6 @@ static void router_packet_receive( router_state * s, static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { - router_ecount--; - router_rev_ecount++; int num_qos_levels = s->params->num_qos_levels; int output_port = msg->saved_vc; @@ -4026,6 +3929,7 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m { s->busy_time[output_port] = msg->saved_rcv_time; s->busy_time_sample[output_port] = msg->saved_sample_time; + s->ross_rsample.busy_time[output_port] = msg->saved_sample_time; s->last_buf_full[output_port] = msg->saved_busy_time; } @@ -4063,14 +3967,11 @@ static void router_packet_send_rc(router_state * s, tw_bf * bf, terminal_dally_m if(bf->c4) { s->in_send_loop[output_port] = 1; - return; } } /* routes the current packet to the next stop */ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { - router_ecount++; - tw_stime ts; tw_event *e; terminal_dally_message *m; @@ -4116,6 +4017,7 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes msg->saved_sample_time = s->busy_time_sample[output_port]; s->busy_time[output_port] += (tw_now(lp) - s->last_buf_full[output_port]); s->busy_time_sample[output_port] += (tw_now(lp) - s->last_buf_full[output_port]); + s->ross_rsample.busy_time[output_port] += (tw_now(lp) - s->last_buf_full[output_port]); s->last_buf_full[output_port] = 0.0; } @@ -4147,7 +4049,7 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes double bytetime = delay; if(cur_entry->msg.packet_size == 0) - bytetime = bytes_to_ns(CREDIT_SIZE, bandwidth); + bytetime = bytes_to_ns(s->params->credit_size, bandwidth); if((cur_entry->msg.packet_size < s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) bytetime = bytes_to_ns(cur_entry->msg.packet_size % s->params->chunk_size, bandwidth); @@ -4191,17 +4093,12 @@ static void router_packet_send( router_state * s, tw_bf * bf, terminal_dally_mes m->magic = router_magic_num; int msg_size = s->params->chunk_size; - if((cur_entry->msg.packet_size % s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) { bf->c11 = 1; - s->link_traffic[output_port] += (cur_entry->msg.packet_size % - s->params->chunk_size); - s->link_traffic_sample[output_port] += (cur_entry->msg.packet_size % - s->params->chunk_size); - s->ross_rsample.link_traffic_sample[output_port] += (cur_entry->msg.packet_size % - s->params->chunk_size); - s->link_traffic_ross_sample[output_port] += (cur_entry->msg.packet_size % - s->params->chunk_size); + s->link_traffic[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); + s->link_traffic_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); + s->ross_rsample.link_traffic_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); + s->link_traffic_ross_sample[output_port] += (cur_entry->msg.packet_size % s->params->chunk_size); msg_size = cur_entry->msg.packet_size % s->params->chunk_size; } else { @@ -4353,6 +4250,46 @@ static void router_buf_update(router_state * s, tw_bf * bf, terminal_dally_messa return; } +void +terminal_dally_event( terminal_state * s, + tw_bf * bf, + terminal_dally_message * msg, + tw_lp * lp ) +{ + s->fwd_events++; + s->ross_sample.fwd_events++; + //*(int *)bf = (int)0; + assert(msg->magic == terminal_magic_num); + + rc_stack_gc(lp, s->st); + switch(msg->type) + { + case T_GENERATE: + packet_generate(s,bf,msg,lp); + break; + + case T_ARRIVE: + packet_arrive(s,bf,msg,lp); + break; + + case T_SEND: + packet_send(s,bf,msg,lp); + break; + + case T_BUFFER: + terminal_buf_update(s, bf, msg, lp); + break; + + case T_BANDWIDTH: + issue_bw_monitor_event(s, bf, msg, lp); + break; + + default: + printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type); + tw_error(TW_LOC, "Msg type not supported"); + } +} + void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) { @@ -4392,6 +4329,38 @@ void router_dally_event(router_state * s, tw_bf * bf, terminal_dally_message * m } } +/* Reverse computation handler for a terminal event */ +void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +{ + s->rev_events++; + s->ross_sample.rev_events++; + switch(msg->type) + { + case T_GENERATE: + packet_generate_rc(s, bf, msg, lp); + break; + + case T_SEND: + packet_send_rc(s, bf, msg, lp); + break; + + case T_ARRIVE: + packet_arrive_rc(s, bf, msg, lp); + break; + + case T_BUFFER: + terminal_buf_update_rc(s, bf, msg, lp); + break; + + case T_BANDWIDTH: + issue_bw_monitor_event_rc(s,bf, msg, lp); + break; + + default: + tw_error(TW_LOC, "\n Invalid terminal event type %d ", msg->type); + } +} + /* Reverse computation handler for a router event */ void router_dally_rc_event_handler(router_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) @@ -4417,226 +4386,333 @@ void router_dally_rc_event_handler(router_state * s, tw_bf * bf, } } -void -terminal_dally_event( terminal_state * s, - tw_bf * bf, - terminal_dally_message * msg, - tw_lp * lp ) +/* dragonfly compute node and router LP types */ +extern "C" { +tw_lptype dragonfly_dally_lps[] = { - s->fwd_events++; - s->ross_sample.fwd_events++; - //*(int *)bf = (int)0; - assert(msg->magic == terminal_magic_num); + // Terminal handling functions + { + (init_f)terminal_dally_init, + (pre_run_f) NULL, + (event_f) terminal_dally_event, + (revent_f) terminal_dally_rc_event_handler, + (commit_f) terminal_dally_commit, + (final_f) dragonfly_dally_terminal_final, + (map_f) codes_mapping, + sizeof(terminal_state) + }, + { + (init_f) router_dally_init, + (pre_run_f) NULL, + (event_f) router_dally_event, + (revent_f) router_dally_rc_event_handler, + (commit_f) router_dally_commit, + (final_f) dragonfly_dally_router_final, + (map_f) codes_mapping, + sizeof(router_state), + }, + {NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0}, +}; +} - rc_stack_gc(lp, s->st); - switch(msg->type) - { - case T_GENERATE: - packet_generate(s,bf,msg,lp); - break; - - case T_ARRIVE: - packet_arrive(s,bf,msg,lp); - break; - - case T_SEND: - packet_send(s,bf,msg,lp); - break; - - case T_BUFFER: - terminal_buf_update(s, bf, msg, lp); - break; - - case T_BANDWIDTH: - issue_bw_monitor_event(s, bf, msg, lp); - break; - - default: - printf("\n LP %d Terminal message type not supported %d ", (int)lp->gid, msg->type); - tw_error(TW_LOC, "Msg type not supported"); - } +/* returns the dragonfly lp type for lp registration */ +static const tw_lptype* dragonfly_dally_get_cn_lp_type(void) +{ + return(&dragonfly_dally_lps[0]); +} +static const tw_lptype* router_dally_get_lp_type(void) +{ + return (&dragonfly_dally_lps[1]); +} + +static void dragonfly_dally_register(tw_lptype *base_type) { + lp_type_register(LP_CONFIG_NM_TERM, base_type); } +static void router_dally_register(tw_lptype *base_type) { + lp_type_register(LP_CONFIG_NM_ROUT, base_type); +} -/* Reverse computation handler for a terminal event */ -void terminal_dally_rc_event_handler(terminal_state * s, tw_bf * bf, terminal_dally_message * msg, tw_lp * lp) +/* Routing Functions */ +static void dfdally_select_intermediate_group(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) { - s->rev_events++; - s->ross_sample.rev_events++; - switch(msg->type) - { - case T_GENERATE: - packet_generate_rc(s, bf, msg, lp); - break; + int fdest_group_id = fdest_router_id / s->params->num_routers; + int origin_group_id = msg->origin_router_id / s->params->num_routers; - case T_SEND: - packet_send_rc(s, bf, msg, lp); - break; + // Has an intermediate group been chosen yet? (Should happen at first router) + if (msg->intm_grp_id == -1) { // Intermediate group hasn't been chosen yet, choose one randomly and route toward it + assert(s->router_id == msg->origin_router_id); + msg->num_rngs++; + int rand_group_id; + if (NONMIN_INCLUDE_SOURCE_DEST) //then any group is a valid intermediate group + rand_group_id = tw_rand_integer(lp->rng, 0, s->params->num_groups-1); + else { //then we don't consider source or dest groups as valid intermediate groups + vector group_list; + for (int i = 0; i < s->params->num_groups; i++) + { + if ((i != origin_group_id) && (i != fdest_group_id)) { + group_list.push_back(i); + } + } + int rand_sel = tw_rand_integer(lp->rng, 0, group_list.size()-1); + rand_group_id = group_list[rand_sel]; + } + msg->intm_grp_id = rand_group_id; + } + else { //the only time that it is re-set is when a router didn't have a direct connection to the intermediate group but had no other options + // so we need to pick an intm group that the current router DOES have a connection to. + assert(s->router_id != msg->origin_router_id); - case T_ARRIVE: - packet_arrive_rc(s, bf, msg, lp); - break; + set< int > valid_intm_groups; + vector< Connection > global_conns = s->connMan->get_connections_by_type(CONN_GLOBAL); + for (vector::iterator it = global_conns.begin(); it != global_conns.end(); it ++) { + Connection conn = *it; + if (NONMIN_INCLUDE_SOURCE_DEST) //then any group I connect to is valid + { + valid_intm_groups.insert(conn.dest_group_id); + } + else + { + if ((conn.dest_group_id != fdest_group_id) && (conn.dest_group_id != origin_group_id)) + valid_intm_groups.insert(conn.dest_group_id); + } + } - case T_BUFFER: - terminal_buf_update_rc(s, bf, msg, lp); - break; + int rand_sel = tw_rand_integer(lp->rng, 0, valid_intm_groups.size()-1); + msg->num_rngs++; + set< int >::iterator it = valid_intm_groups.begin(); + advance(it, rand_sel); //you can't just use [] to access a set + msg->intm_grp_id = *it; + } +} - case T_BANDWIDTH: - issue_bw_monitor_event_rc(s,bf, msg, lp); - break; +//when using this function, you should assume that the self router is NOT the destination. That should be handled elsewhere. +static vector< Connection > get_legal_minimal_stops(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) +{ + int my_router_id = s->router_id; + int my_group_id = s->group_id; + int origin_group_id = msg->origin_router_id / s->params->num_routers; + int fdest_group_id = fdest_router_id / s->params->num_routers; - default: - tw_error(TW_LOC, "\n Invalid terminal event type %d ", msg->type); + if (my_group_id != fdest_group_id) { //we're in origin group or intermediate group - either way we need to route to fdest group minimally + vector< Connection > conns_to_dest_group = s->connMan->get_connections_to_group(fdest_group_id); + if (conns_to_dest_group.size() > 0) { //then we have a direct connection to dest group + return conns_to_dest_group; // --------- return direct connection + } + else { //we don't have a direct connection to group and need list of routers in our group that do + vector< Connection > poss_next_conns_to_group; + set< int > poss_router_id_set_to_group; //TODO this might be a source of non-determinism(?) + for(int i = 0; i < connectionList[my_group_id][fdest_group_id].size(); i++) + { + int poss_router_id = connectionList[my_group_id][fdest_group_id][i]; + if (poss_router_id_set_to_group.count(poss_router_id) == 0) { //we only want to consider a single router id once (we look at all connections to it using the conn man) + vector< Connection > conns = s->connMan->get_connections_to_gid(poss_router_id, CONN_LOCAL); + poss_router_id_set_to_group.insert(poss_router_id); + poss_next_conns_to_group.insert(poss_next_conns_to_group.end(), conns.begin(), conns.end()); + } + } + return poss_next_conns_to_group; // --------- return non-direct connection (still minimal though) + } + } + else { //then we're in the final destination group, also we assume that we're not the fdest router + assert(my_group_id == fdest_group_id); + assert(my_router_id != fdest_router_id); //this should be handled outside of this function + + vector< Connection > conns_to_fdest_router = s->connMan->get_connections_to_gid(fdest_router_id, CONN_LOCAL); + return conns_to_fdest_router; + } +} + +//Note that this is different than Dragonfly Plus's implementation, this isn't the converse of minimal, these are any +//connections that could lead to the intermediate group or a new one if necessary +static vector< Connection > get_legal_nonminimal_stops(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) +{ + int my_router_id = s->router_id; + int my_group_id = s->group_id; + int origin_group_id = msg->origin_router_id / s->params->num_routers; + int fdest_group_id = fdest_router_id / s->params->num_routers; + bool in_intermediate_group = (my_group_id != origin_group_id) && (my_group_id != fdest_group_id); + int preset_intm_group_id = msg->intm_grp_id; + + if (my_group_id == origin_group_id) { + vector< Connection > conns_to_intm_group = s->connMan->get_connections_to_group(preset_intm_group_id); + + //are we the originating router + if (my_router_id == msg->origin_router_id) { //then we are able to route within our own group if necessary + // Do we have direct connection to intermediate group? + if (conns_to_intm_group.size() > 0) { //yes + return conns_to_intm_group; + } + else { //no - route within group to router that DOES have a connection to intm group + vector connecting_router_ids = connectionList[my_group_id][preset_intm_group_id]; + vector< Connection > conns_to_connecting_routers; + for (int i = 0; i < connecting_router_ids.size(); i++) + { + int poss_router_id = connecting_router_ids[i]; + vector< Connection > candidate_conns = s->connMan->get_connections_to_gid(poss_router_id, CONN_LOCAL); + conns_to_connecting_routers.insert(conns_to_connecting_routers.end(), candidate_conns.begin(), candidate_conns.end()); + } + return conns_to_connecting_routers; + } + } + else { //then we can't afford to reroute within our group, we must route to the int group if possible - pick a new one if not + if (conns_to_intm_group.size() > 0) { + return conns_to_intm_group; //route there directly + } + else { //pick a new one! + dfdally_select_intermediate_group(s, bf, msg, lp, fdest_router_id); + conns_to_intm_group = s->connMan->get_connections_to_group(msg->intm_grp_id); //new intm group id + return conns_to_intm_group; + } + } } -} - -/* dragonfly compute node and router LP types */ -extern "C" { -tw_lptype dragonfly_dally_lps[] = -{ - // Terminal handling functions + else if (in_intermediate_group) { + //if we're in the intermediate group then we're just going to default to routing minimally, return an empty vector. + vector< Connection > empty; + return empty; + } + else if (my_group_id == fdest_group_id) { - (init_f)terminal_dally_init, - (pre_run_f) NULL, - (event_f) terminal_dally_event, - (revent_f) terminal_dally_rc_event_handler, - (commit_f) terminal_dally_commit, - (final_f) dragonfly_dally_terminal_final, - (map_f) codes_mapping, - sizeof(terminal_state) - }, + //same as intermediate, force minimal choices + vector< Connection > empty; + return empty; + } + else { - (init_f) router_dally_setup, - (pre_run_f) NULL, - (event_f) router_dally_event, - (revent_f) router_dally_rc_event_handler, - (commit_f) router_dally_commit, - (final_f) dragonfly_dally_router_final, - (map_f) codes_mapping, - sizeof(router_state), - }, - {NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0}, -}; + tw_error(TW_LOC, "Invalid group somehow: not origin, not intermediate, and not fdest group\n"); + vector< Connection > empty; + return empty; + } } -/* For ROSS event tracing */ -void custom_dally_dragonfly_event_collect(terminal_dally_message *m, tw_lp *lp, char *buffer, int *collect_flag) +static Connection dfdally_minimal_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) { - (void)lp; - (void)collect_flag; + vector< Connection > poss_next_stops = get_legal_minimal_stops(s, bf, msg, lp, fdest_router_id); + if (poss_next_stops.size() < 1) + tw_error(TW_LOC, "MINIMAL DEAD END\n"); - int type = (int) m->type; - memcpy(buffer, &type, sizeof(type)); + ConnectionType conn_type = poss_next_stops[0].conn_type; //TODO this assumes that all possible next stops are of same type - OK for now, but remember this + if (conn_type == CONN_GLOBAL) { //TOOD should we really only randomize global and not local? should we really do light adaptive for nonglobal? + msg->num_rngs++; + int rand_sel = tw_rand_integer(lp->rng, 0, poss_next_stops.size() - 1); + return poss_next_stops[rand_sel]; + } + else + { + Connection best_min_conn = get_absolute_best_connection_from_conns(s, bf, msg, lp, poss_next_stops); + return best_min_conn; + } } -void custom_dally_dragonfly_model_stat_collect(terminal_state *s, tw_lp *lp, char *buffer) +// Coloquially: "Valiant Group Routing" +// This follows the randomized indirect routing algorithm detailed in "Cost-Efficient Dragonfly topology for Large-Scale Systems" and +// "Technology-Driven, Highly-Scalable Dragonfly Topology" by Kim, Dally, Scott, and Abts +// They sourced it from "A scheme for fast parallel communication" by L.G. Valiant +// It differs from true valiant routing in that it randomly selects a GROUP and routes to it - not a random intermediate router +static Connection dfdally_nonminimal_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) { - (void)lp; - - int index = 0; - tw_lpid id = 0; - long tmp = 0; - tw_stime tmp2 = 0; - - id = s->terminal_id; - memcpy(&buffer[index], &id, sizeof(id)); - index += sizeof(id); - - tmp = s->fin_chunks_ross_sample; - memcpy(&buffer[index], &tmp, sizeof(tmp)); - index += sizeof(tmp); - s->fin_chunks_ross_sample = 0; + int my_router_id = s->router_id; + int my_group_id = s->group_id; + int fdest_group_id = fdest_router_id / s->params->num_routers; + int origin_group_id = msg->origin_router_id / s->params->num_routers; - tmp = s->data_size_ross_sample; - memcpy(&buffer[index], &tmp, sizeof(tmp)); - index += sizeof(tmp); - s->data_size_ross_sample = 0; + assert(msg->intm_grp_id != -1); // This needs to have already been set. + // //The setting of intm_grp_id is kept out of this function so that other routing algorithms can utilize this routing function + // //and set it themselves based on the context. This helps avoid an instance like: prog-adaptive routing, second router in path decides + // //to take non-minimal routing but the pre-selected intermediate group ID isn't accessible to it and would require re-routing and + // //thus take extra hops. Possible illegal move and could cause deadlock. - tmp = s->fin_hops_ross_sample; - memcpy(&buffer[index], &tmp, sizeof(tmp)); - index += sizeof(tmp); - s->fin_hops_ross_sample = 0; + if (my_group_id == msg->intm_grp_id) //then we've visited the intermediate group by definition + msg->is_intm_visited = 1; - tmp2 = s->fin_chunks_time_ross_sample; - memcpy(&buffer[index], &tmp2, sizeof(tmp2)); - index += sizeof(tmp2); - s->fin_chunks_time_ross_sample = 0; + int next_dest_group_id; //The ID of the group that we are aiming for next - either intermediate group or fdest group + if (msg->is_intm_visited == 1) // Then we need to route to the fdest group + next_dest_group_id = fdest_group_id; + else // Then we haven't visited the intermediate group yet and need to route there first + next_dest_group_id = msg->intm_grp_id; - tmp2 = s->busy_time_ross_sample; - memcpy(&buffer[index], &tmp2, sizeof(tmp2)); - index += sizeof(tmp2); - s->busy_time_ross_sample = 0; + // Do I have a direct connection to the next_dest group? + vector< Connection > conns_to_next_group = s->connMan->get_connections_to_group(next_dest_group_id); + if (conns_to_next_group.size() > 0) { //Then yes I do + msg->num_rngs++; + int rand_sel = tw_rand_integer(lp->rng, 0, conns_to_next_group.size()-1); + Connection next_conn = conns_to_next_group[rand_sel]; + return next_conn; + } + else { // I need to route to a router in my group that does have a direct connection to the intermediate group + vector connecting_router_ids = connectionList[my_group_id][next_dest_group_id]; + assert(connecting_router_ids.size() > 0); + msg->num_rngs++; + int rand_sel = tw_rand_integer(lp->rng, 0, connecting_router_ids.size()-1); + int conn_router_id = connecting_router_ids[rand_sel]; - return; + //There may be parallel connections to the same router - randomly select from them + vector< Connection > conns_to_next_router = s->connMan->get_connections_to_gid(conn_router_id, CONN_LOCAL); + assert(conns_to_next_router.size() > 0); + msg->num_rngs++; + rand_sel = tw_rand_integer(lp->rng, 0, conns_to_next_router.size()-1); + Connection next_conn = conns_to_next_router[rand_sel]; + return next_conn; + } } -void custom_dally_dfly_router_model_stat_collect(router_state *s, tw_lp *lp, char *buffer) +//Uses PAR algorithm +static Connection dfdally_prog_adaptive_routing(router_state *s, tw_bf *bf, terminal_dally_message *msg, tw_lp *lp, int fdest_router_id) { - (void)lp; + int my_router_id = s->router_id; + int my_group_id = s->group_id; + int fdest_group_id = fdest_router_id / s->params->num_routers; + int origin_group_id = msg->origin_router_id / s->params->num_routers; + int adaptive_threshold = s->params->adaptive_threshold; + + // The check for detination group local routing has already been completed - we can assume we're not in the destination group - const dragonfly_param * p = s->params; - int i, index = 0; + // are we in the intermediate group? + if (my_group_id == msg->intm_grp_id) + msg->is_intm_visited = 1; - tw_lpid id = 0; - tw_stime tmp = 0; - int64_t tmp2 = 0; + Connection nextStopConn; + vector< Connection > poss_min_next_stops = get_legal_minimal_stops(s, bf, msg, lp, fdest_router_id); + vector< Connection > poss_nonmin_next_stops = get_legal_nonminimal_stops(s, bf, msg, lp, fdest_router_id); - id = s->router_id; - memcpy(&buffer[index], &id, sizeof(id)); - index += sizeof(id); + Connection best_min_conn, best_nonmin_conn; + ConnectionType conn_type_of_mins, conn_type_of_nonmins; - for(i = 0; i < p->radix; i++) + if (poss_min_next_stops.size() > 0) { - tmp = s->busy_time_ross_sample[i]; - memcpy(&buffer[index], &tmp, sizeof(tmp)); - index += sizeof(tmp); - s->busy_time_ross_sample[i] = 0; - - tmp2 = s->link_traffic_ross_sample[i]; - memcpy(&buffer[index], &tmp2, sizeof(tmp2)); - index += sizeof(tmp2); - s->link_traffic_ross_sample[i] = 0; + conn_type_of_mins = poss_min_next_stops[0].conn_type; // All of these in this vector should be the same... + } + if (poss_nonmin_next_stops.size() > 0) + { + conn_type_of_nonmins = poss_nonmin_next_stops[0].conn_type; } - return; -} - -static const st_model_types *custom_dally_dragonfly_get_model_types(void) -{ - return(&custom_dally_dragonfly_model_types[0]); -} - -static const st_model_types *custom_dally_dfly_router_get_model_types(void) -{ - return(&custom_dally_dragonfly_model_types[1]); -} - -static void custom_dally_dragonfly_register_model_types(st_model_types *base_type) -{ - st_model_type_register(LP_CONFIG_NM_TERM, base_type); -} -static void custom_dally_router_register_model_types(st_model_types *base_type) -{ - st_model_type_register(LP_CONFIG_NM_ROUT, base_type); -} -/*** END of ROSS event tracing additions */ + if (conn_type_of_mins == CONN_GLOBAL) + best_min_conn = dfdally_get_best_from_k_connections(s, bf, msg, lp, poss_min_next_stops, s->params->global_k_picks); + else + best_min_conn = get_absolute_best_connection_from_conns(s, bf, msg, lp, poss_min_next_stops); //could use from_k_connections function but that's very expensive when k == size of input connections + + if (conn_type_of_nonmins == CONN_GLOBAL) + best_nonmin_conn = dfdally_get_best_from_k_connections(s, bf, msg, lp, poss_nonmin_next_stops, s->params->global_k_picks); + else + best_nonmin_conn = get_absolute_best_connection_from_conns(s, bf, msg, lp, poss_nonmin_next_stops); -/* returns the dragonfly lp type for lp registration */ -static const tw_lptype* dragonfly_dally_get_cn_lp_type(void) -{ - return(&dragonfly_dally_lps[0]); -} -static const tw_lptype* router_dally_get_lp_type(void) -{ - return (&dragonfly_dally_lps[1]); -} + int min_score = dfdally_score_connection(s, bf, msg, lp, best_min_conn, C_MIN); + int nonmin_score = dfdally_score_connection(s, bf, msg, lp, best_nonmin_conn, C_NONMIN); -static void dragonfly_dally_register(tw_lptype *base_type) { - lp_type_register(LP_CONFIG_NM_TERM, base_type); -} + if ((msg->path_type == NON_MINIMAL) && (msg->is_intm_visited != 1)) { //if we're nonminimal and haven't reached the intermediate group yet + //must pick non-minimal (if we have visited, we can pick minimal then as nonminimal will be an empty vector) + return best_nonmin_conn; + } -static void router_dally_register(tw_lptype *base_type) { - lp_type_register(LP_CONFIG_NM_ROUT, base_type); + if (min_score <= adaptive_threshold) + return best_min_conn; + else if (min_score <= nonmin_score) + return best_min_conn; + else { + msg->path_type = NON_MINIMAL; + return best_nonmin_conn; + } } /* @@ -4651,7 +4727,9 @@ data structures used by the model. Specify: routing="prog-adaptive-legacy" in th USE AT OWN RISK. One should consider support for this routing algorithm ended. From thorough analysis, I believe that there are bugs and unintended behavior in this code but I do not have proper documentation to say for certain. Thanks, -NM -LAST SHA FOR ORIGINAL CODE: + +Link to last version prior to this port: https://github.com/codes-org/codes/releases/tag/old-dfdally +(See git tag: old-dfdally) */ //This is utilized by prog_adaptive_legacy routing - returns the first channel number from the conneciton list taht features a given router to the specified group @@ -4853,7 +4931,7 @@ static tw_lpid get_next_stop_legacy(router_state *s, tw_lp *lp, tw_bf *bf, termi next_stop % num_routers_per_mgrp, &router_dest_id); if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) - printf("\n Next stop is %ld ", next_stop); + printf("\n Next stop is %d ", next_stop); return router_dest_id; } @@ -4911,7 +4989,7 @@ static tw_lpid get_next_stop_legacy(router_state *s, tw_lp *lp, tw_bf *bf, termi } if(msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) - printf("\n Next stop is %ld ", dest_lp); + printf("\n Next stop is %d ", dest_lp); codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, s->anno, 0, dest_lp / num_routers_per_mgrp, dest_lp % num_routers_per_mgrp, &router_dest_id); diff --git a/src/networks/model-net/dragonfly-plus.C b/src/networks/model-net/dragonfly-plus.C index 8ae5f5d6..ae72321a 100644 --- a/src/networks/model-net/dragonfly-plus.C +++ b/src/networks/model-net/dragonfly-plus.C @@ -35,10 +35,11 @@ #define DUMP_CONNECTIONS 0 #define PRINT_CONFIG 1 #define T_ID 1 -#define CREDIT_SIZE 8 #define DFLY_HASH_TABLE_SIZE 40000 #define SHOW_ADAPTIVE_STATS 1 #define BW_MONITOR 1 +// maximum number of characters allowed to represent the routing algorithm as a string +#define MAX_ROUTING_CHARS 32 // debugging parameters #define TRACK -1 @@ -53,28 +54,25 @@ #define LP_METHOD_NM_ROUT (model_net_method_names[DRAGONFLY_PLUS_ROUTER]) using namespace std; -struct Link -{ - int offset; -}; -struct bLink -{ - int offset, dest; -}; /*MM: Maintains a list of routers connecting the source and destination groups */ static vector< vector< vector< int > > > connectionList; static vector< ConnectionManager > connManagerList; - -struct IntraGroupLink -{ +/* IntraGroupLink is a struct used to unpack binary data regarding inter group + connections from the supplied inter-group file. This struct should not be + utilized anywhere else in the model. +*/ +struct IntraGroupLink { int src, dest; }; -struct InterGroupLink -{ +/* InterGroupLink is a struct used to unpack binary data regarding inter group + connections from the supplied inter-group file. This struct should not be + utilized anywhere else in the model. +*/ +struct InterGroupLink { int src, dest; }; @@ -100,10 +98,7 @@ static FILE * dragonfly_rtr_bw_log = NULL; #define indexer2d(_ptr, _x, _y, _maxx, _maxy) \ ((_ptr) + _y * (_maxx) + _x) -static double maxd(double a, double b) -{ - return a < b ? b : a; -} +static double maxd(double a, double b) { return a < b ? b : a; } /* minimal and non-minimal packet counts for adaptive routing*/ static int minimal_count = 0, nonmin_count = 0; @@ -242,7 +237,10 @@ struct dragonfly_plus_param double cn_delay; double local_delay; double global_delay; - double credit_delay; + int credit_size; + double local_credit_delay; + double global_credit_delay; + double cn_credit_delay; double router_delay; }; @@ -286,90 +284,6 @@ struct dfly_qhash_entry struct qhash_head hash_link; }; -/* handles terminal and router events like packet generate/send/receive/buffer */ -typedef struct terminal_state terminal_state; -typedef struct router_state router_state; - -/* dragonfly compute node data structure */ -struct terminal_state -{ - uint64_t packet_counter; - - int packet_gen; - int packet_fin; - - // Dragonfly specific parameters - unsigned int router_id; - unsigned int terminal_id; - - // Each terminal will have an input and output channel with the router - int *vc_occupancy; // NUM_VC - int num_vcs; - tw_stime terminal_available_time; - terminal_plus_message_list **terminal_msgs; - terminal_plus_message_list **terminal_msgs_tail; - int in_send_loop; - struct mn_stats dragonfly_stats_array[CATEGORY_MAX]; - - int * qos_status; - unsigned long long* qos_data; - - int last_qos_lvl; - int is_monitoring_bw; - - struct rc_stack *st; - int issueIdle; - unsigned long long * terminal_length; - - const char *anno; - const dragonfly_plus_param *params; - - struct qhash_table *rank_tbl; - uint64_t rank_tbl_pop; - - tw_stime total_time; - uint64_t total_msg_size; - double total_hops; - long finished_msgs; - long finished_chunks; - long finished_packets; - - tw_stime last_buf_full; - tw_stime busy_time; - - tw_stime max_latency; - tw_stime min_latency; - - char output_buf[4096]; - char output_buf2[4096]; - /* For LP suspend functionality */ - int error_ct; - - /* For sampling */ - long fin_chunks_sample; - long data_size_sample; - double fin_hops_sample; - tw_stime fin_chunks_time; - tw_stime busy_time_sample; - - char sample_buf[4096]; - struct dfly_cn_sample *sample_stat; - int op_arr_size; - int max_arr_size; - - /* for logging forward and reverse events */ - long fwd_events; - long rev_events; - - // for ROSS Instrumentation - long fin_chunks_ross_sample; - long data_size_ross_sample; - long fin_hops_ross_sample; - tw_stime fin_chunks_time_ross_sample; - tw_stime busy_time_ross_sample; - struct dfly_cn_sample ross_sample; -}; - /* terminal event type (1-4) */ typedef enum event_t { T_GENERATE = 1, @@ -437,6 +351,47 @@ typedef enum route_scoring_preference_t HIGHER } route_scoring_preference_t; +enum router_type +{ + SPINE = 1, + LEAF +}; + +typedef enum intermediate_router_t +{ + INT_CHOICE_LEAF = 1, + INT_CHOICE_SPINE, + INT_CHOICE_BOTH +} intermediate_router_t; + +static map< int, router_type> router_type_map; + +static char* get_routing_alg_chararray(int routing_alg_int) +{ + char* rt_alg = (char*) calloc(MAX_ROUTING_CHARS, sizeof(char)); + switch (routing_alg_int) + { + case MINIMAL: + strcpy(rt_alg, "MINIMAL"); + break; + case NON_MINIMAL_SPINE: + strcpy(rt_alg, "NON_MINIMAL_SPINE"); + break; + case NON_MINIMAL_LEAF: + strcpy(rt_alg, "NON_MINIMAL_LEAF"); + break; + case PROG_ADAPTIVE: + strcpy(rt_alg, "PROG_ADAPTIVE"); + break; + case FULLY_PROG_ADAPTIVE: + strcpy(rt_alg, "FULL_PROG_ADAPTIVE"); + default: + tw_error(TW_LOC, "Routing Algorithm is UNDEFINED - did you call get_routing_alg_string() before setting the static global variable: 'routing'?"); + break; + } + return rt_alg; +} + static bool isRoutingAdaptive(int alg) { if (alg == PROG_ADAPTIVE || alg == FULLY_PROG_ADAPTIVE) @@ -461,27 +416,93 @@ static bool isRoutingNonminimalExplicit(int alg) return false; } -enum LINK_TYPE -{ - GREEN, - BLACK, -}; +/* handles terminal and router events like packet generate/send/receive/buffer */ +typedef struct terminal_state terminal_state; +typedef struct router_state router_state; -enum router_type +/* dragonfly compute node data structure */ +struct terminal_state { - SPINE = 1, - LEAF -}; + uint64_t packet_counter; -typedef enum intermediate_router_t -{ - INT_CHOICE_LEAF = 1, - INT_CHOICE_SPINE, - INT_CHOICE_BOTH -} intermediate_router_t; + int packet_gen; + int packet_fin; -static map< int, router_type> router_type_map; + int total_gen_size; + + // Dragonfly specific parameters + unsigned int router_id; + unsigned int terminal_id; + + // Each terminal will have an input and output channel with the router + int *vc_occupancy; // NUM_VC + int num_vcs; + tw_stime terminal_available_time; + terminal_plus_message_list **terminal_msgs; + terminal_plus_message_list **terminal_msgs_tail; + int in_send_loop; + struct mn_stats dragonfly_stats_array[CATEGORY_MAX]; + + int * qos_status; + unsigned long long* qos_data; + + int last_qos_lvl; + int is_monitoring_bw; + + struct rc_stack *st; + int issueIdle; + unsigned long long * terminal_length; + + const char *anno; + const dragonfly_plus_param *params; + + struct qhash_table *rank_tbl; + uint64_t rank_tbl_pop; + + tw_stime total_time; + uint64_t total_msg_size; + double total_hops; + long finished_msgs; + long finished_chunks; + long finished_packets; + + tw_stime last_buf_full; + tw_stime busy_time; + + unsigned long stalled_chunks; //Counter for when a packet cannot be immediately routed due to full VC + + tw_stime max_latency; + tw_stime min_latency; + + char output_buf[4096]; + char output_buf2[4096]; + /* For LP suspend functionality */ + int error_ct; + + /* For sampling */ + long fin_chunks_sample; + long data_size_sample; + double fin_hops_sample; + tw_stime fin_chunks_time; + tw_stime busy_time_sample; + + char sample_buf[4096]; + struct dfly_cn_sample *sample_stat; + int op_arr_size; + int max_arr_size; + + /* for logging forward and reverse events */ + long fwd_events; + long rev_events; + // for ROSS Instrumentation + long fin_chunks_ross_sample; + long data_size_ross_sample; + long fin_hops_ross_sample; + tw_stime fin_chunks_time_ross_sample; + tw_stime busy_time_ross_sample; + struct dfly_cn_sample ross_sample; +}; struct router_state { @@ -510,6 +531,8 @@ struct router_state tw_stime *busy_time; tw_stime *busy_time_sample; + unsigned long* stalled_chunks; //Coutner for when a packet is put into queued messages instead of routing due to full VC + terminal_plus_message_list ***pending_msgs; terminal_plus_message_list ***pending_msgs_tail; terminal_plus_message_list ***queued_msgs; @@ -559,3118 +582,3142 @@ st_model_types dfly_plus_model_types[] = { {(ev_trace_f) dfly_plus_event_collect, sizeof(int), (model_stat_f) dfly_plus_router_model_stat_collect, - 0, //updated in router_plus_setup() since it's based on the radix + 0, //updated in router_plus_init() since it's based on the radix (sample_event_f) ross_dfly_plus_rsample_fn, (sample_revent_f) ross_dfly_plus_rsample_rc_fn, - 0 } , //updated in router_plus_setup() since it's based on the radix + 0 } , //updated in router_plus_init() since it's based on the radix {NULL, 0, NULL, 0, NULL, NULL, 0} }; /* End of ROSS model instrumentation */ -int dragonfly_plus_get_assigned_router_id(int terminal_id, const dragonfly_plus_param *p); - -static short routing = MINIMAL; -static short scoring = ALPHA; -static short scoring_preference = LOWER; - -static tw_stime dragonfly_total_time = 0; -static tw_stime dragonfly_max_latency = 0; - +// event tracing callback - used router and terminal LPs +void dfly_plus_event_collect(terminal_plus_message *m, tw_lp *lp, char *buffer, int *collect_flag) +{ + (void)lp; + (void)collect_flag; -static long long total_hops = 0; -static long long N_finished_packets = 0; -static long long total_msg_sz = 0; -static long long N_finished_msgs = 0; -static long long N_finished_chunks = 0; + int type = (int) m->type; + memcpy(buffer, &type, sizeof(type)); +} -static int dragonfly_rank_hash_compare(void *key, struct qhash_head *link) +// GVT-based and real time sampling callback for terminals +void dfly_plus_model_stat_collect(terminal_state *s, tw_lp *lp, char *buffer) { - struct dfly_hash_key *message_key = (struct dfly_hash_key *) key; - struct dfly_qhash_entry *tmp = NULL; + (void)lp; - tmp = qhash_entry(link, struct dfly_qhash_entry, hash_link); + int index = 0; + tw_lpid id = 0; + long tmp = 0; + tw_stime tmp2 = 0; + + id = s->terminal_id; + memcpy(&buffer[index], &id, sizeof(id)); + index += sizeof(id); - if (tmp->key.message_id == message_key->message_id && tmp->key.sender_id == message_key->sender_id) - return 1; + tmp = s->fin_chunks_ross_sample; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->fin_chunks_ross_sample = 0; - return 0; -} -static int dragonfly_hash_func(void *k, int table_size) -{ - struct dfly_hash_key *tmp = (struct dfly_hash_key *) k; - uint32_t pc = 0, pb = 0; - bj_hashlittle2(tmp, sizeof(*tmp), &pc, &pb); - return (int) (pc % (table_size - 1)); - /*uint64_t key = (~tmp->message_id) + (tmp->message_id << 18); - key = key * 21; - key = ~key ^ (tmp->sender_id >> 4); - key = key * tmp->sender_id; - return (int)(key & (table_size - 1));*/ -} + tmp = s->data_size_ross_sample; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->data_size_ross_sample = 0; -/* convert GiB/s and bytes to ns */ -static tw_stime bytes_to_ns(uint64_t bytes, double GB_p_s) -{ - tw_stime time; + tmp = s->fin_hops_ross_sample; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->fin_hops_ross_sample = 0; - /* bytes to GB */ - time = ((double) bytes) / (1024.0 * 1024.0 * 1024.0); - /* GiB to s */ - time = time / GB_p_s; - /* s to ns */ - time = time * 1000.0 * 1000.0 * 1000.0; + tmp2 = s->fin_chunks_time_ross_sample; + memcpy(&buffer[index], &tmp2, sizeof(tmp2)); + index += sizeof(tmp2); + s->fin_chunks_time_ross_sample = 0; - return (time); -} + tmp2 = s->busy_time_ross_sample; + memcpy(&buffer[index], &tmp2, sizeof(tmp2)); + index += sizeof(tmp2); + s->busy_time_ross_sample = 0; -/* returns the dragonfly message size */ -int dragonfly_plus_get_msg_sz(void) -{ - return sizeof(terminal_plus_message); + return; } -static void free_tmp(void *ptr) +// GVT-based and real time sampling callback for routers +void dfly_plus_router_model_stat_collect(router_state *s, tw_lp *lp, char *buffer) { - struct dfly_qhash_entry *dfly = (dfly_qhash_entry *) ptr; - if (dfly->remote_event_data) - free(dfly->remote_event_data); + (void)lp; - if (dfly) - free(dfly); -} + const dragonfly_plus_param * p = s->params; + int i, index = 0; -static void append_to_terminal_plus_message_list(terminal_plus_message_list **thisq, - terminal_plus_message_list **thistail, - int index, - terminal_plus_message_list *msg) -{ - if (thisq[index] == NULL) { - thisq[index] = msg; - } - else { - thistail[index]->next = msg; - msg->prev = thistail[index]; + tw_lpid id = 0; + tw_stime tmp = 0; + int64_t tmp2 = 0; + + id = s->router_id; + memcpy(&buffer[index], &id, sizeof(id)); + index += sizeof(id); + + for(i = 0; i < p->radix; i++) + { + tmp = s->busy_time_ross_sample[i]; + memcpy(&buffer[index], &tmp, sizeof(tmp)); + index += sizeof(tmp); + s->busy_time_ross_sample[i] = 0; + + tmp2 = s->link_traffic_ross_sample[i]; + memcpy(&buffer[index], &tmp2, sizeof(tmp2)); + index += sizeof(tmp2); + s->link_traffic_ross_sample[i] = 0; } - thistail[index] = msg; + return; } -static void prepend_to_terminal_plus_message_list(terminal_plus_message_list **thisq, - terminal_plus_message_list **thistail, - int index, - terminal_plus_message_list *msg) +static const st_model_types *dfly_plus_get_model_types(void) { - if (thisq[index] == NULL) { - thistail[index] = msg; - } - else { - thisq[index]->prev = msg; - msg->next = thisq[index]; - } - thisq[index] = msg; + return(&dfly_plus_model_types[0]); } -static terminal_plus_message_list *return_head(terminal_plus_message_list **thisq, - terminal_plus_message_list **thistail, - int index) +static const st_model_types *dfly_plus_router_get_model_types(void) { - terminal_plus_message_list *head = thisq[index]; - if (head != NULL) { - thisq[index] = head->next; - if (head->next != NULL) { - head->next->prev = NULL; - head->next = NULL; - } - else { - thistail[index] = NULL; - } - } - return head; + return(&dfly_plus_model_types[1]); } -static terminal_plus_message_list *return_tail(terminal_plus_message_list **thisq, - terminal_plus_message_list **thistail, - int index) +static void dfly_plus_register_model_types(st_model_types *base_type) { - terminal_plus_message_list *tail = thistail[index]; - assert(tail); - if (tail->prev != NULL) { - tail->prev->next = NULL; - thistail[index] = tail->prev; - tail->prev = NULL; - } - else { - thistail[index] = NULL; - thisq[index] = NULL; - } - return tail; + st_model_type_register(LP_CONFIG_NM_TERM, base_type); } -void dragonfly_plus_print_params(const dragonfly_plus_param *p) +static void dfly_plus_router_register_model_types(st_model_types *base_type) { - printf("\n------------------ Dragonfly Plus Parameters ---------\n"); - printf("\tnum_routers = %d\n",p->num_routers); - printf("\tlocal_bandwidth = %.2f\n",p->local_bandwidth); - printf("\tglobal_bandwidth = %.2f\n",p->global_bandwidth); - printf("\tcn_bandwidth = %.2f\n",p->cn_bandwidth); - printf("\tnum_vcs = %d\n",p->num_vcs); - printf("\tlocal_vc_size = %d\n",p->local_vc_size); - printf("\tglobal_vc_size = %d\n",p->global_vc_size); - printf("\tcn_vc_size = %d\n",p->cn_vc_size); - printf("\tchunk_size = %d\n",p->chunk_size); - printf("\tnum_cn = %d\n",p->num_cn); - printf("\tintra_grp_radix = %d\n",p->intra_grp_radix); - printf("\tnum_qos_levels = %d\n",p->num_qos_levels); - printf("\tnum_router_spine = %d\n",p->num_router_spine); - printf("\tnum_router_leaf = %d\n",p->num_router_leaf); - printf("\tmax_port_score = %ld\n",p->max_port_score); - printf("\tnum_groups = %d\n",p->num_groups); - printf("\tvirtual radix = %d\n",p->radix); - printf("\ttotal_routers = %d\n",p->total_routers); - printf("\ttotal_terminals = %d\n",p->total_terminals); - printf("\tnum_global_connections = %d\n",p->num_global_connections); - printf("\tcn_delay = %.2f\n",p->cn_delay); - printf("\tlocal_delay = %.2f\n",p->local_delay); - printf("\tglobal_delay = %.2f\n",p->global_delay); - printf("\tcredit_delay = %.2f\n",p->credit_delay); - printf("\trouter_delay = %.2f\n",p->router_delay); - printf("\tscoring = %d\n",scoring); - printf("\tadaptive_threshold = %d\n",p->adaptive_threshold); - printf("\trouting = %d\n",routing); - printf("\tsource_leaf_consider_nonmin = %s\n", (p->source_leaf_consider_nonmin ? "true" : "false")); - printf("\tint_spine_consider_min = %s\n", (p->int_spine_consider_min ? "true" : "false")); - printf("\tdest_spine_consider_nonmin = %s\n", (p->dest_spine_consider_nonmin ? "true" : "false")); - printf("\tdest_spine_consider_gnonmin = %s\n", (p->dest_spine_consider_global_nonmin ? "true" : "false")); - printf("\tmax hops notification = %d\n",p->max_hops_notify); - printf("------------------------------------------------------\n\n"); + st_model_type_register(LP_CONFIG_NM_ROUT, base_type); } +/*** END of ROSS Instrumentation support */ -static void dragonfly_read_config(const char *anno, dragonfly_plus_param *params) +/* ROSS Instrumentation layer */ +// virtual time sampling callback - router forward +static void ross_dfly_plus_rsample_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample) { - /*Adding init for router magic number*/ - uint32_t h1 = 0, h2 = 0; - bj_hashlittle2(LP_METHOD_NM_ROUT, strlen(LP_METHOD_NM_ROUT), &h1, &h2); - router_magic_num = h1 + h2; + (void)lp; + (void)bf; - bj_hashlittle2(LP_METHOD_NM_TERM, strlen(LP_METHOD_NM_TERM), &h1, &h2); - terminal_magic_num = h1 + h2; + const dragonfly_plus_param * p = s->params; + int i = 0; - // shorthand - dragonfly_plus_param *p = params; - int myRank; - MPI_Comm_rank(MPI_COMM_CODES, &myRank); + sample->router_id = s->router_id; + sample->end_time = tw_now(lp); + sample->fwd_events = s->fwd_events; + sample->rev_events = s->rev_events; + sample->busy_time = (tw_stime*)((&sample->rev_events) + 1); + sample->link_traffic_sample = (int64_t*)((&sample->busy_time[0]) + p->radix); - int rc = configuration_get_value_int(&config, "PARAMS", "local_vc_size", anno, &p->local_vc_size); - if (rc) { - p->local_vc_size = 1024; - if(!myRank) - fprintf(stderr, "Buffer size of local channels not specified, setting to %d\n", p->local_vc_size); + for(; i < p->radix; i++) + { + sample->busy_time[i] = s->ross_rsample.busy_time[i]; + sample->link_traffic_sample[i] = s->ross_rsample.link_traffic_sample[i]; } - rc = configuration_get_value_int(&config, "PARAMS", "global_vc_size", anno, &p->global_vc_size); - if (rc) { - p->global_vc_size = 2048; - if(!myRank) - fprintf(stderr, "Buffer size of global channels not specified, setting to %d\n", p->global_vc_size); - } - - rc = configuration_get_value_int(&config, "PARAMS", "num_qos_levels", anno, &p->num_qos_levels); - if(rc) { - p->num_qos_levels = 1; - if(!myRank) - fprintf(stderr, "Number of QOS levels not specified, setting to %d\n", p->num_qos_levels); - } - - char qos_levels_str[MAX_NAME_LENGTH]; - rc = configuration_get_value(&config, "PARAMS", "qos_bandwidth", anno, qos_levels_str, MAX_NAME_LENGTH); - p->qos_bandwidths = (int*)calloc(p->num_qos_levels, sizeof(int)); + /* clear up the current router stats */ + s->fwd_events = 0; + s->rev_events = 0; - if(p->num_qos_levels > 1) + for( i = 0; i < p->radix; i++) { - int total_bw = 0; - char * token; - token = strtok(qos_levels_str, ","); - int i = 0; - while(token != NULL) - { - sscanf(token, "%d", &p->qos_bandwidths[i]); - total_bw += p->qos_bandwidths[i]; - if(p->qos_bandwidths[i] <= 0) - { - tw_error(TW_LOC, "\nInvalid bandwidth levels\n"); - } - i++; - token = strtok(NULL,","); - } - assert(total_bw <= 100); + s->ross_rsample.busy_time[i] = 0; + s->ross_rsample.link_traffic_sample[i] = 0; } - else - p->qos_bandwidths[0] = 100; - - rc = configuration_get_value_double(&config, "PARAMS", "max_qos_monitor", anno, &max_qos_monitor); - if(rc) { - if(!myRank) - fprintf(stderr, "Setting max_qos_monitor to %lf\n", max_qos_monitor); - } +} - rc = configuration_get_value_int(&config, "PARAMS", "cn_vc_size", anno, &p->cn_vc_size); - if (rc) { - p->cn_vc_size = 1024; - if(!myRank) - fprintf(stderr, "Buffer size of compute node channels not specified, setting to %d\n", p->cn_vc_size); - } +// virtual time sampling callback - router reverse +static void ross_dfly_plus_rsample_rc_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample) +{ + (void)lp; + (void)bf; + + const dragonfly_plus_param * p = s->params; + int i =0; - rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", anno, &p->chunk_size); - if (rc) { - p->chunk_size = 512; - if(!myRank) - fprintf(stderr, "Chunk size for packets is specified, setting to %d\n", p->chunk_size); + for(; i < p->radix; i++) + { + s->ross_rsample.busy_time[i] = sample->busy_time[i]; + s->ross_rsample.link_traffic_sample[i] = sample->link_traffic_sample[i]; } - rc = configuration_get_value_double(&config, "PARAMS", "local_bandwidth", anno, &p->local_bandwidth); - if (rc) { - p->local_bandwidth = 5.25; - if(!myRank) - fprintf(stderr, "Bandwidth of local channels not specified, setting to %lf\n", p->local_bandwidth); - } + s->fwd_events = sample->fwd_events; + s->rev_events = sample->rev_events; +} - rc = configuration_get_value_double(&config, "PARAMS", "global_bandwidth", anno, &p->global_bandwidth); - if (rc) { - p->global_bandwidth = 4.7; - if(!myRank) - fprintf(stderr, "Bandwidth of global channels not specified, setting to %lf\n", p->global_bandwidth); - } +// virtual time sampling callback - terminal forward +static void ross_dfly_plus_sample_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample) +{ + (void)lp; + (void)bf; + + sample->terminal_id = s->terminal_id; + sample->fin_chunks_sample = s->ross_sample.fin_chunks_sample; + sample->data_size_sample = s->ross_sample.data_size_sample; + sample->fin_hops_sample = s->ross_sample.fin_hops_sample; + sample->fin_chunks_time = s->ross_sample.fin_chunks_time; + sample->busy_time_sample = s->ross_sample.busy_time_sample; + sample->end_time = tw_now(lp); + sample->fwd_events = s->fwd_events; + sample->rev_events = s->rev_events; - rc = configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", anno, &p->cn_bandwidth); - if (rc) { - p->cn_bandwidth = 5.25; - if(!myRank) - fprintf(stderr, "Bandwidth of compute node channels not specified, setting to %lf\n", p->cn_bandwidth); - } + s->ross_sample.fin_chunks_sample = 0; + s->ross_sample.data_size_sample = 0; + s->ross_sample.fin_hops_sample = 0; + s->fwd_events = 0; + s->rev_events = 0; + s->ross_sample.fin_chunks_time = 0; + s->ross_sample.busy_time_sample = 0; +} - rc = configuration_get_value_double(&config, "PARAMS", "router_delay", anno, &p->router_delay); - if (rc) { - p->router_delay = 100; - } +// virtual time sampling callback - terminal reverse +static void ross_dfly_plus_sample_rc_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample) +{ + (void)lp; + (void)bf; - configuration_get_value(&config, "PARAMS", "cn_sample_file", anno, cn_sample_file, MAX_NAME_LENGTH); - configuration_get_value(&config, "PARAMS", "rt_sample_file", anno, router_sample_file, MAX_NAME_LENGTH); + s->ross_sample.busy_time_sample = sample->busy_time_sample; + s->ross_sample.fin_chunks_time = sample->fin_chunks_time; + s->ross_sample.fin_hops_sample = sample->fin_hops_sample; + s->ross_sample.data_size_sample = sample->data_size_sample; + s->ross_sample.fin_chunks_sample = sample->fin_chunks_sample; + s->fwd_events = sample->fwd_events; + s->rev_events = sample->rev_events; +} - char routing_str[MAX_NAME_LENGTH]; - configuration_get_value(&config, "PARAMS", "routing", anno, routing_str, MAX_NAME_LENGTH); - if (strcmp(routing_str, "minimal") == 0) - routing = MINIMAL; - else if (strcmp(routing_str, "non-minimal-spine") == 0) - routing = NON_MINIMAL_SPINE; - else if (strcmp(routing_str, "non-minimal-leaf") == 0) - routing = NON_MINIMAL_LEAF; - else if (strcmp(routing_str, "prog-adaptive") == 0) - routing = PROG_ADAPTIVE; - else if (strcmp(routing_str, "fully-prog-adaptive") == 0) - routing = FULLY_PROG_ADAPTIVE; - else { - if(!myRank) - fprintf(stderr, "No routing protocol specified, setting to minimal routing\n"); - routing = MINIMAL; - } +void dragonfly_plus_rsample_init(router_state *s, tw_lp *lp) +{ + (void) lp; + int i = 0; + const dragonfly_plus_param *p = s->params; - rc = configuration_get_value_int(&config, "PARAMS", "notification_on_hops_greater_than", anno, &p->max_hops_notify); - if (rc) { - if(!myRank) - fprintf(stderr, "Maximum hops for notifying not specified, setting to INT MAX\n"); - p->max_hops_notify = INT_MAX; - } + assert(p->radix); - int src_leaf_cons_choice; - rc = configuration_get_value_int(&config, "PARAMS", "source_leaf_consider_nonmin", anno, &src_leaf_cons_choice); - if (rc) { - // fprintf(stderr, "Source leaf consideration of nonmin ports not specified. Defaulting to True\n"); - p->source_leaf_consider_nonmin = true; - } - else if (src_leaf_cons_choice == 1) { - p->source_leaf_consider_nonmin = true; + s->max_arr_size = MAX_STATS; + s->rsamples = (struct dfly_router_sample *) calloc(MAX_STATS, sizeof(struct dfly_router_sample)); + for (; i < s->max_arr_size; i++) { + s->rsamples[i].busy_time = (tw_stime *) calloc(p->radix, sizeof(tw_stime)); + s->rsamples[i].link_traffic_sample = (int64_t *) calloc(p->radix, sizeof(int64_t)); } - else - p->source_leaf_consider_nonmin = false; +} +void dragonfly_plus_rsample_rc_fn(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +{ + (void) bf; + (void) lp; + (void) msg; + s->op_arr_size--; + int cur_indx = s->op_arr_size; + struct dfly_router_sample stat = s->rsamples[cur_indx]; - int int_spn_cons_choice; - rc = configuration_get_value_int(&config, "PARAMS", "int_spine_consider_min", anno, &int_spn_cons_choice); - if (rc) { - // fprintf(stderr, "Int spine consideration of min ports not specified. Defaulting to False\n"); - p->int_spine_consider_min = false; - } - else if (int_spn_cons_choice == 1) { - p->int_spine_consider_min = true; - } - else - p->int_spine_consider_min = false; + const dragonfly_plus_param *p = s->params; + int i = 0; - int dst_spn_cons_choice; - rc = configuration_get_value_int(&config, "PARAMS", "dest_spine_consider_nonmin", anno, &dst_spn_cons_choice); - if (rc) { - // fprintf(stderr, "Dest spine consideration of nonmin ports not specified. Defaulting to False\n"); - p->dest_spine_consider_nonmin = false; - } - else if (dst_spn_cons_choice == 1) { - p->dest_spine_consider_nonmin = true; + for (; i < p->radix; i++) { + s->busy_time_sample[i] = stat.busy_time[i]; + s->link_traffic_sample[i] = stat.link_traffic_sample[i]; } - else - p->dest_spine_consider_nonmin = false; - - int dst_spn_gcons_choice; - rc = configuration_get_value_int(&config, "PARAMS", "dest_spine_consider_global_nonmin", anno, &dst_spn_gcons_choice); - if (rc) { - // fprintf(stderr, "Dest spine consideration of global nonmin ports not specified. Defaulting to True\n"); - p->dest_spine_consider_global_nonmin = true; - } - else if (dst_spn_gcons_choice == 1) { - p->dest_spine_consider_global_nonmin = true; + for (i = 0; i < p->radix; i++) { + stat.busy_time[i] = 0; + stat.link_traffic_sample[i] = 0; } - else - p->dest_spine_consider_global_nonmin = false; + s->fwd_events = stat.fwd_events; + s->rev_events = stat.rev_events; +} +void dragonfly_plus_rsample_fn(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +{ + (void) bf; + (void) lp; + (void) msg; - /* MM: This should be 2 for dragonfly plus*/ - p->num_vcs = 2; - - if(p->num_qos_levels > 1) - p->num_vcs = p->num_qos_levels * p->num_vcs; + const dragonfly_plus_param *p = s->params; - rc = configuration_get_value_int(&config, "PARAMS", "num_groups", anno, &p->num_groups); - if (rc) { - tw_error(TW_LOC, "\nnum_groups not specified, Aborting\n"); - } - rc = configuration_get_value_int(&config, "PARAMS", "num_router_spine", anno, &p->num_router_spine); - if (rc) { - tw_error(TW_LOC, "\nnum_router_spine not specified, Aborting\n"); - } - rc = configuration_get_value_int(&config, "PARAMS", "num_router_leaf", anno, &p->num_router_leaf); - if (rc) { - tw_error(TW_LOC, "\nnum_router_leaf not specified, Aborting\n"); + if (s->op_arr_size >= s->max_arr_size) { + struct dfly_router_sample *tmp = + (dfly_router_sample *) calloc((MAX_STATS + s->max_arr_size), sizeof(struct dfly_router_sample)); + memcpy(tmp, s->rsamples, s->op_arr_size * sizeof(struct dfly_router_sample)); + free(s->rsamples); + s->rsamples = tmp; + s->max_arr_size += MAX_STATS; } - p->num_routers = p->num_router_spine + p->num_router_leaf; // num routers per group - p->intra_grp_radix = max(p->num_router_spine, p->num_router_leaf); //TODO: Is this sufficient? If there are parallel intra connecitons, this will break. + int i = 0; + int cur_indx = s->op_arr_size; - rc = configuration_get_value_int(&config, "PARAMS", "num_cns_per_router", anno, &p->num_cn); - if (rc) { - if(!myRank) - fprintf(stderr,"Number of cns per router not specified, setting to %d\n", 4); - p->num_cn = 4; - } + s->rsamples[cur_indx].router_id = s->router_id; + s->rsamples[cur_indx].end_time = tw_now(lp); + s->rsamples[cur_indx].fwd_events = s->fwd_events; + s->rsamples[cur_indx].rev_events = s->rev_events; - rc = configuration_get_value_int(&config, "PARAMS", "num_global_connections", anno, &p->num_global_connections); - if (rc) { - tw_error(TW_LOC, "\nnum_global_connections per router not specified, abortin..."); + for (; i < p->radix; i++) { + s->rsamples[cur_indx].busy_time[i] = s->busy_time_sample[i]; + s->rsamples[cur_indx].link_traffic_sample[i] = s->link_traffic_sample[i]; } - p->radix = p->intra_grp_radix + p->num_global_connections + - p->num_cn; // TODO this may not be sufficient, radix isn't same for leaf and spine routers - p->total_routers = p->num_groups * p->num_routers; - p->total_terminals = (p->num_groups * p->num_router_leaf) * p->num_cn; - char scoring_str[MAX_NAME_LENGTH]; - configuration_get_value(&config, "PARAMS", "route_scoring_metric", anno, scoring_str, MAX_NAME_LENGTH); - if (strcmp(scoring_str, "alpha") == 0) { - scoring = ALPHA; - scoring_preference = LOWER; - } - else if (strcmp(scoring_str, "beta") == 0) { - scoring = BETA; - scoring_preference = LOWER; - } - else if (strcmp(scoring_str, "gamma") == 0) { - tw_error(TW_LOC, "Gamma scoring protocol currently non-functional"); //TODO: Fix gamma scoring protocol - scoring = GAMMA; - scoring_preference = HIGHER; - } - else if (strcmp(scoring_str, "delta") == 0) { - scoring = DELTA; - scoring_preference = LOWER; - } - else { - if(!myRank) - fprintf(stderr, "No route scoring protocol specified, setting to alpha scoring\n"); - scoring = ALPHA; - scoring_preference = LOWER; - } + s->op_arr_size++; - rc = configuration_get_value_int(&config, "PARAMS", "adaptive_threshold", anno, &p->adaptive_threshold); - if (rc) { - if(!myRank) - fprintf(stderr, "Adaptive Minimal Routing Threshold not specified: setting to default = 0. (Will consider minimal and nonminimal routes based on scoring metric alone)\n"); - p->adaptive_threshold = 0; - } + /* clear up the current router stats */ + s->fwd_events = 0; + s->rev_events = 0; + for (i = 0; i < p->radix; i++) { + s->busy_time_sample[i] = 0; + s->link_traffic_sample[i] = 0; + } +} - int largest_vc_size = 0; - if (p->local_vc_size > largest_vc_size) - largest_vc_size = p->local_vc_size; - if (p->global_vc_size > largest_vc_size) - largest_vc_size = p->global_vc_size; - if (p->cn_vc_size > largest_vc_size) - largest_vc_size = p->cn_vc_size; +void dragonfly_plus_rsample_fin(router_state *s, tw_lp *lp) +{ + (void) lp; + const dragonfly_plus_param *p = s->params; - p->max_port_score = (p->num_vcs * largest_vc_size) + largest_vc_size; //The maximum score that a port can get during the scoring metrics. + if (s->router_id == 0) { + /* write metadata file */ + char meta_fname[64]; + sprintf(meta_fname, "dragonfly-router-sampling.meta"); - // read intra group connections, store from a router's perspective - // all links to the same router form a vector - char intraFile[MAX_NAME_LENGTH]; - configuration_get_value(&config, "PARAMS", "intra-group-connections", anno, intraFile, MAX_NAME_LENGTH); - if (strlen(intraFile) <= 0) { - tw_error(TW_LOC, "\nIntra group connections file not specified. Aborting\n"); + FILE *fp = fopen(meta_fname, "w"); + fprintf(fp, + "Router sample struct format: \nrouter_id (tw_lpid) \nbusy time for each of the %d links " + "(double) \n" + "link traffic for each of the %d links (int64_t) \nsample end time (double) forward events " + "per sample \nreverse events per sample ", + p->radix, p->radix); + fclose(fp); } + char rt_fn[MAX_NAME_LENGTH]; + if (strcmp(router_sample_file, "") == 0) + sprintf(rt_fn, "dragonfly-router-sampling-%ld.bin", g_tw_mynode); + else + sprintf(rt_fn, "%s-%ld.bin", router_sample_file, g_tw_mynode); - //setup Connection Managers for each router - for(int i = 0; i < p->total_routers; i++) - { - int src_id_global = i; - int src_id_local = i % p->num_routers; - int src_group = i / p->num_routers; + int i = 0; - ConnectionManager conman = ConnectionManager(src_id_local, src_id_global, src_group, p->intra_grp_radix, p->num_global_connections, p->num_cn, p->num_routers); - connManagerList.push_back(conman); + int size_sample = sizeof(tw_lpid) + p->radix * (sizeof(int64_t) + sizeof(tw_stime)) + + sizeof(tw_stime) + 2 * sizeof(long); + FILE *fp = fopen(rt_fn, "a"); + fseek(fp, sample_rtr_bytes_written, SEEK_SET); + + for (; i < s->op_arr_size; i++) { + fwrite((void *) &(s->rsamples[i].router_id), sizeof(tw_lpid), 1, fp); + fwrite(s->rsamples[i].busy_time, sizeof(tw_stime), p->radix, fp); + fwrite(s->rsamples[i].link_traffic_sample, sizeof(int64_t), p->radix, fp); + fwrite((void *) &(s->rsamples[i].end_time), sizeof(tw_stime), 1, fp); + fwrite((void *) &(s->rsamples[i].fwd_events), sizeof(long), 1, fp); + fwrite((void *) &(s->rsamples[i].rev_events), sizeof(long), 1, fp); } + sample_rtr_bytes_written += (s->op_arr_size * size_sample); + fclose(fp); +} +void dragonfly_plus_sample_init(terminal_state *s, tw_lp *lp) +{ + (void) lp; + s->fin_chunks_sample = 0; + s->data_size_sample = 0; + s->fin_hops_sample = 0; + s->fin_chunks_time = 0; + s->busy_time_sample = 0; - FILE *groupFile = fopen(intraFile, "rb"); - if (!groupFile) - tw_error(TW_LOC, "\nintra-group file not found\n"); + s->op_arr_size = 0; + s->max_arr_size = MAX_STATS; - IntraGroupLink newLink; - while (fread(&newLink, sizeof(IntraGroupLink), 1, groupFile) != 0) { - int src_id_local = newLink.src; - int dest_id_local = newLink.dest; - for(int i = 0; i < p->total_routers; i++) - { - int group_id = i/p->num_routers; - if (i % p->num_routers == src_id_local) - { - int dest_id_global = group_id * p->num_routers + dest_id_local; - connManagerList[i].add_connection(dest_id_global, CONN_LOCAL); - } - } - } - fclose(groupFile); + s->sample_stat = (dfly_cn_sample *) calloc(MAX_STATS, sizeof(struct dfly_cn_sample)); +} +void dragonfly_plus_sample_rc_fn(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +{ + (void) lp; + (void) bf; + (void) msg; - //terminal assignment! - for(int i = 0; i < p->total_terminals; i++) - { - int assigned_router_id = dragonfly_plus_get_assigned_router_id(i, p); - int assigned_group_id = assigned_router_id / p->num_routers; - connManagerList[assigned_router_id].add_connection(i, CONN_TERMINAL); - } + s->op_arr_size--; + int cur_indx = s->op_arr_size; + struct dfly_cn_sample stat = s->sample_stat[cur_indx]; + s->busy_time_sample = stat.busy_time_sample; + s->fin_chunks_time = stat.fin_chunks_time; + s->fin_hops_sample = stat.fin_hops_sample; + s->data_size_sample = stat.data_size_sample; + s->fin_chunks_sample = stat.fin_chunks_sample; + s->fwd_events = stat.fwd_events; + s->rev_events = stat.rev_events; - // read inter group connections, store from a router's perspective - // also create a group level table that tells all the connecting routers - char interFile[MAX_NAME_LENGTH]; - configuration_get_value(&config, "PARAMS", "inter-group-connections", anno, interFile, MAX_NAME_LENGTH); - if (strlen(interFile) <= 0) { - tw_error(TW_LOC, "\nInter group connections file not specified. Aborting\n"); - } - FILE *systemFile = fopen(interFile, "rb"); - if (!myRank) { - printf("Reading inter-group connectivity file: %s\n", interFile); - printf("\nTotal routers: %d; total groups: %d \n", p->total_routers, p->num_groups); - } + stat.busy_time_sample = 0; + stat.fin_chunks_time = 0; + stat.fin_hops_sample = 0; + stat.data_size_sample = 0; + stat.fin_chunks_sample = 0; + stat.end_time = 0; + stat.terminal_id = 0; + stat.fwd_events = 0; + stat.rev_events = 0; +} - connectionList.resize(p->num_groups); - for (int g = 0; g < connectionList.size(); g++) { - connectionList[g].resize(p->num_groups); +void dragonfly_plus_sample_fn(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +{ + (void) lp; + (void) msg; + (void) bf; + + if (s->op_arr_size >= s->max_arr_size) { + /* In the worst case, copy array to a new memory location, its very + * expensive operation though */ + struct dfly_cn_sample *tmp = + (dfly_cn_sample *) calloc((MAX_STATS + s->max_arr_size), sizeof(struct dfly_cn_sample)); + memcpy(tmp, s->sample_stat, s->op_arr_size * sizeof(struct dfly_cn_sample)); + free(s->sample_stat); + s->sample_stat = tmp; + s->max_arr_size += MAX_STATS; } - InterGroupLink newInterLink; - while (fread(&newInterLink, sizeof(InterGroupLink), 1, systemFile) != 0) { - int src_id_global = newInterLink.src; - int src_group_id = src_id_global / p->num_routers; - int dest_id_global = newInterLink.dest; - int dest_group_id = dest_id_global / p->num_routers; + int cur_indx = s->op_arr_size; - // printf("[%d -> %d]\n",src_id_global, dest_id_global); - connManagerList[src_id_global].add_connection(dest_id_global, CONN_GLOBAL); + s->sample_stat[cur_indx].terminal_id = s->terminal_id; + s->sample_stat[cur_indx].fin_chunks_sample = s->fin_chunks_sample; + s->sample_stat[cur_indx].data_size_sample = s->data_size_sample; + s->sample_stat[cur_indx].fin_hops_sample = s->fin_hops_sample; + s->sample_stat[cur_indx].fin_chunks_time = s->fin_chunks_time; + s->sample_stat[cur_indx].busy_time_sample = s->busy_time_sample; + s->sample_stat[cur_indx].end_time = tw_now(lp); + s->sample_stat[cur_indx].fwd_events = s->fwd_events; + s->sample_stat[cur_indx].rev_events = s->rev_events; - int r; - for (r = 0; r < connectionList[src_group_id][dest_group_id].size(); r++) { - if (connectionList[src_group_id][dest_group_id][r] == newInterLink.src) - break; - } - if (r == connectionList[src_group_id][dest_group_id].size()) { - connectionList[src_group_id][dest_group_id].push_back(newInterLink.src); - } - } + s->op_arr_size++; + s->fin_chunks_sample = 0; + s->data_size_sample = 0; + s->fin_hops_sample = 0; + s->fwd_events = 0; + s->rev_events = 0; + s->fin_chunks_time = 0; + s->busy_time_sample = 0; +} - if (DUMP_CONNECTIONS) - { - if (!myRank) { - for(int i=0; i < connManagerList.size(); i++) - { - connManagerList[i].print_connections(); - } - } - } +void dragonfly_plus_sample_fin(terminal_state *s, tw_lp *lp) +{ + (void) lp; - for(int i = 0; i < p->total_routers; i++){ - int loc_id = i % p->num_routers; - if (loc_id < p->num_router_leaf) - router_type_map[i] = LEAF; - else - router_type_map[i] = SPINE; - } + if (!g_tw_mynode) { + /* write metadata file */ + char meta_fname[64]; + sprintf(meta_fname, "dragonfly-cn-sampling.meta"); - - if (!myRank) { - printf("\nTotal nodes: %d, Total routers: %d, Num groups: %d, Routers per group: %d, Virtual radix: %d\n", - p->num_cn * p->num_router_leaf * p->num_groups, p->total_routers, p->num_groups, p->num_routers, p->radix); + FILE *fp = fopen(meta_fname, "w"); + fprintf( + fp, + "Compute node sample format\nterminal_id (tw_lpid) \nfinished chunks (long)" + "\ndata size per sample (long) \nfinished hops (double) \ntime to finish chunks (double)" + "\nbusy time (double)\nsample end time(double) \nforward events (long) \nreverse events (long)"); + fclose(fp); } - p->cn_delay = bytes_to_ns(p->chunk_size, p->cn_bandwidth); - p->local_delay = bytes_to_ns(p->chunk_size, p->local_bandwidth); - p->global_delay = bytes_to_ns(p->chunk_size, p->global_bandwidth); - p->credit_delay = bytes_to_ns(CREDIT_SIZE, p->local_bandwidth); // assume 8 bytes packet + char rt_fn[MAX_NAME_LENGTH]; + if (strncmp(cn_sample_file, "", 10) == 0) + sprintf(rt_fn, "dragonfly-cn-sampling-%ld.bin", g_tw_mynode); + else + sprintf(rt_fn, "%s-%ld.bin", cn_sample_file, g_tw_mynode); - fflush(stderr); - fflush(stdout); + FILE *fp = fopen(rt_fn, "a"); + fseek(fp, sample_bytes_written, SEEK_SET); + fwrite(s->sample_stat, sizeof(struct dfly_cn_sample), s->op_arr_size, fp); + fclose(fp); - if (PRINT_CONFIG) { - if(!myRank) - dragonfly_plus_print_params(p); - } - stored_params = p; + sample_bytes_written += (s->op_arr_size * sizeof(struct dfly_cn_sample)); } -void dragonfly_plus_configure() -{ - anno_map = codes_mapping_get_lp_anno_map(LP_CONFIG_NM_TERM); - assert(anno_map); - num_params = anno_map->num_annos + (anno_map->has_unanno_lp > 0); - all_params = (dragonfly_plus_param *) calloc(num_params, sizeof(*all_params)); +int dragonfly_plus_get_assigned_router_id(int terminal_id, const dragonfly_plus_param *p); - for (int i = 0; i < anno_map->num_annos; i++) { - const char *anno = anno_map->annotations[i].ptr; - dragonfly_read_config(anno, &all_params[i]); - } - if (anno_map->has_unanno_lp > 0) { - dragonfly_read_config(NULL, &all_params[anno_map->num_annos]); - } -#ifdef ENABLE_CORTEX -// model_net_topology = dragonfly_plus_cortex_topology; -#endif -} +static short routing = MINIMAL; +static short scoring = ALPHA; +static short scoring_preference = LOWER; -/* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */ -void dragonfly_plus_report_stats() -{ - long long avg_hops, total_finished_packets, total_finished_chunks; - long long total_finished_msgs, final_msg_sz; - tw_stime avg_time, max_time; - int total_minimal_packets, total_nonmin_packets; - long total_gen, total_fin; - long total_local_packets_sr, total_local_packets_sg, total_remote_packets; +/*Routing Implementation Declarations*/ +static Connection do_dfp_prog_adaptive_routing(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, int fdest_router_id); - MPI_Reduce(&total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&dragonfly_total_time, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&dragonfly_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES); +/*Routing Helper Declarations*/ +static int get_min_hops_to_dest_from_conn(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, Connection conn); +static vector< Connection > get_legal_minimal_stops(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, int fdest_router_id); +static vector< Connection > get_legal_nonminimal_stops(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, vector< Connection > possible_minimal_stops, int fdest_router_id); - MPI_Reduce(&packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &num_local_packets_sr, &total_local_packets_sr, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &num_local_packets_sg, &total_local_packets_sg, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce( &num_remote_packets, &total_remote_packets, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); - if(isRoutingAdaptive(routing) || SHOW_ADAPTIVE_STATS) { - MPI_Reduce(&minimal_count, &total_minimal_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); - MPI_Reduce(&nonmin_count, &total_nonmin_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); - } - /* print statistics */ - if (!g_tw_mynode) { - if (PRINT_CONFIG) - dragonfly_plus_print_params(stored_params); +static tw_stime dragonfly_total_time = 0; +static tw_stime dragonfly_max_latency = 0; - printf( - "Average number of router hops traversed: %f; average chunk latency: %lf us; maximum chunk latency: %lf us; " - "avg message size: %lf bytes; finished messages: %lld; finished chunks: %lld \n", - (float) avg_hops / total_finished_chunks, avg_time / (total_finished_chunks * 1000), - max_time / 1000, (float) final_msg_sz / total_finished_msgs, total_finished_msgs, - total_finished_chunks); - if(isRoutingAdaptive(routing) || SHOW_ADAPTIVE_STATS) { - printf("\nADAPTIVE ROUTING STATS: %d chunks routed minimally %d chunks routed non-minimally - completed packets: %lld \n", - total_minimal_packets, total_nonmin_packets, total_finished_chunks); - } - printf("\nTotal packets generated: %ld; finished: %ld; Locally routed: same router: %ld, different-router: %ld; Remote (inter-group): %ld \n", total_gen, total_fin, total_local_packets_sr, total_local_packets_sg, total_remote_packets); - } - return; -} +static long long total_hops = 0; +static long long N_finished_packets = 0; +static long long total_msg_sz = 0; +static long long N_finished_msgs = 0; +static long long N_finished_chunks = 0; -int dragonfly_plus_get_router_type(int router_id, const dragonfly_plus_param *p) +/* convert GiB/s and bytes to ns */ +static tw_stime bytes_to_ns(uint64_t bytes, double GB_p_s) { - int num_groups = p->num_groups; - int num_routers = p->num_routers; - int num_router_leaf = p->num_router_leaf; + tw_stime time; - int group_id = router_id / num_groups; - int router_local_id = router_id % num_routers; + /* bytes to GB */ + time = ((double) bytes) / (1024.0 * 1024.0 * 1024.0); + /* GiB to s */ + time = time / GB_p_s; + /* s to ns */ + time = time * 1000.0 * 1000.0 * 1000.0; - if (router_local_id > num_router_leaf) - return SPINE; - else - return LEAF; + return (time); } -/* get the router id associated with a given terminal id */ -int dragonfly_plus_get_assigned_router_id(int terminal_id, const dragonfly_plus_param *p) +static int dragonfly_rank_hash_compare(void *key, struct qhash_head *link) { - // currently supports symmetrical bipartite spine/leaf router configurations - // first half of routers in a given group are leafs which have terminals - // second half of routers in a given group are spines which have no terminals - int num_groups = p->num_groups; // number of groups of routers in the network - int num_routers = p->num_routers; // num routers per group - int num_router_leaf = p->num_router_leaf; // num leaf routers per group - int num_cn = p->num_cn; // num compute nodes per leaf router - int num_cn_per_group = (num_router_leaf * num_cn); + struct dfly_hash_key *message_key = (struct dfly_hash_key *) key; + struct dfly_qhash_entry *tmp = NULL; - int group_id = terminal_id / num_cn_per_group; - int local_router_id = (terminal_id / num_cn) % num_router_leaf; - int router_id = (group_id * num_routers) + local_router_id; + tmp = qhash_entry(link, struct dfly_qhash_entry, hash_link); - return router_id; -} + if (tmp->key.message_id == message_key->message_id && tmp->key.sender_id == message_key->sender_id) + return 1; -void router_plus_commit(router_state * s, - tw_bf * bf, - terminal_plus_message * msg, - tw_lp * lp) -{ - if(msg->type == R_BANDWIDTH) - { - if(msg->rc_is_qos_set == 1) { - free(msg->rc_qos_data); - free(msg->rc_qos_status); - msg->rc_is_qos_set = 0; - } - } + return 0; } -void terminal_plus_commit(terminal_state * s, - tw_bf * bf, - terminal_plus_message * msg, - tw_lp * lp) +static int dragonfly_hash_func(void *k, int table_size) { - if(msg->type == T_BANDWIDTH) - { - if(msg->rc_is_qos_set == 1) { - free(msg->rc_qos_data); - free(msg->rc_qos_status); - msg->rc_is_qos_set = 0; - } - } + struct dfly_hash_key *tmp = (struct dfly_hash_key *) k; + uint32_t pc = 0, pb = 0; + bj_hashlittle2(tmp, sizeof(*tmp), &pc, &pb); + return (int) (pc % (table_size - 1)); + /*uint64_t key = (~tmp->message_id) + (tmp->message_id << 18); + key = key * 21; + key = ~key ^ (tmp->sender_id >> 4); + key = key * tmp->sender_id; + return (int)(key & (table_size - 1));*/ } -/* initialize a dragonfly compute node terminal */ -void terminal_plus_init(terminal_state *s, tw_lp *lp) -{ - // printf("%d: Terminal Init()\n",lp->gid); - s->packet_gen = 0; - s->packet_fin = 0; - s->is_monitoring_bw = 0; - - int i; - char anno[MAX_NAME_LENGTH]; - // Assign the global router ID - // TODO: be annotation-aware - codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, &mapping_type_id, anno, - &mapping_rep_id, &mapping_offset); - if (anno[0] == '\0') { - s->anno = NULL; - s->params = &all_params[num_params - 1]; - } - else { - s->anno = strdup(anno); - int id = configuration_get_annotation_index(anno, anno_map); - s->params = &all_params[id]; - } +/* returns the dragonfly message size */ +int dragonfly_plus_get_msg_sz(void) +{ + return sizeof(terminal_plus_message); +} - int num_qos_levels = s->params->num_qos_levels; - int num_lps = codes_mapping_get_lp_count(lp_group_name, 1, LP_CONFIG_NM_TERM, s->anno, 0); - +static void free_tmp(void *ptr) +{ + struct dfly_qhash_entry *dfly = (dfly_qhash_entry *) ptr; + if (dfly->remote_event_data) + free(dfly->remote_event_data); - s->terminal_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); - s->router_id = dragonfly_plus_get_assigned_router_id(s->terminal_id, s->params); - // s->router_id=(int)s->terminal_id / (s->params->num_cn); //TODO I think this is where the router that - // the terminal is connected to is specified + if (dfly) + free(dfly); +} - // printf("%d gid is TERMINAL %d with assigned router %d\n",lp->gid,s->terminal_id,s->router_id); - s->terminal_available_time = 0.0; - s->packet_counter = 0; - s->min_latency = INT_MAX; - s->max_latency = 0; +/** + * Scores a connection based on the metric provided in the function + * @param isMinimalPort a boolean variable used in the Gamma metric to pass whether a given port would lead to the destination in a minimal way + */ +static int dfp_score_connection(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, Connection conn, conn_minimality_t c_minimality) +{ + int score = 0; //can't forget to initialize this to zero. + int port = conn.port; - s->finished_msgs = 0; - s->finished_chunks = 0; - s->finished_packets = 0; - s->total_time = 0.0; - s->total_msg_size = 0; + if (port == -1) { + if (scoring_preference == LOWER) + return INT_MAX; + else + return 0; + } - s->busy_time = 0.0; + switch(scoring) { + case ALPHA: //considers vc occupancy and queued count only LOWER SCORE IS BETTER + { + for(int k=0; k < s->params->num_vcs; k++) + { + score += s->vc_occupancy[port][k]; + } + score += s->queued_count[port]; + break; + } + case BETA: //consideres vc occupancy and queued count multiplied by the number of minimum hops to the destination LOWER SCORE IS BETTER + { + int base_score = 0; + for(int k=0; k < s->params->num_vcs; k++) + { + base_score += s->vc_occupancy[port][k]; + } + base_score += s->queued_count[port]; + score = base_score * get_min_hops_to_dest_from_conn(s, bf, msg, lp, conn); + break; + } + case GAMMA: //consideres vc occupancy and queue count but ports that follow a minimal path to fdest are biased 2:1 bonus by multiplying minimal by 2 HIGHER SCORE IS BETTER + { + score = s->params->max_port_score; //initialize this to max score. + int to_subtract = 0; + for(int k=0; k < s->params->num_vcs; k++) + { + to_subtract += s->vc_occupancy[port][k]; + } + to_subtract += s->queued_count[port]; + score -= to_subtract; - s->fwd_events = 0; - s->rev_events = 0; + if (c_minimality == C_MIN) //the connection maintains the paths minimality - gets a bonus of 2x + score = score * 2; + break; + } + case DELTA: //consideres vc occupancy and queue count but ports that follow a minimal path to fdest are biased 2:1 through dividing minimal by 2 Lower SCORE IS BETTER + { + for(int k=0; k < s->params->num_vcs; k++) + { + score += s->vc_occupancy[port][k]; + } + score += s->queued_count[port]; - rc_stack_create(&s->st); - - s->num_vcs = 1; - if(num_qos_levels > 1) - s->num_vcs *= num_qos_levels; - - /* Whether the virtual channel group is active or over-bw*/ - s->qos_status = (int*)calloc(num_qos_levels, sizeof(int)); - - /* How much data has been transmitted on the virtual channel group within - * the window */ - s->qos_data = (unsigned long long*)calloc(num_qos_levels, sizeof(unsigned long long)); - s->vc_occupancy = (int*)calloc(s->num_vcs, sizeof(int)); + if (c_minimality != C_MIN) + score = score * 2; + break; + } + default: + tw_error(TW_LOC, "Unsupported Scoring Protocol Error\n"); - - for(i = 0; i < num_qos_levels; i++) - { - s->qos_data[i] = 0; - s->qos_status[i] = Q_ACTIVE; } + return score; +} - for(i = 0; i < s->num_vcs; i++) - { - s->vc_occupancy[i] = 0; +static Connection get_absolute_best_connection_from_conns(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, vector conns) +{ + if (conns.size() == 0) { + Connection bad_conn; + bad_conn.src_gid = -1; + bad_conn.port = -1; + return bad_conn; + } + if (conns.size() == 1) { + return conns[0]; } - s->last_qos_lvl = 0; - s->last_buf_full = 0; + int num_to_compare = conns.size(); - s->rank_tbl = NULL; - s->terminal_msgs = - (terminal_plus_message_list **) calloc(s->num_vcs, sizeof(terminal_plus_message_list *)); - s->terminal_msgs_tail = - (terminal_plus_message_list **) calloc(s->num_vcs, sizeof(terminal_plus_message_list *)); + int scores[num_to_compare]; + int best_score_index = 0; + if (scoring_preference == LOWER) { + + int best_score = INT_MAX; + for(int i = 0; i < num_to_compare; i++) + { + scores[i] = dfp_score_connection(s, bf, msg, lp, conns[i], C_MIN); - for(int i = 0; i < s->num_vcs; i++) - { - s->terminal_msgs[i] = NULL; - s->terminal_msgs_tail[i] = NULL; + if (scores[i] < best_score) { + best_score = scores[i]; + best_score_index = i; + } + } } + else { + + int best_score = 0; + for(int i = 0; i < num_to_compare; i++) + { + scores[i] = dfp_score_connection(s, bf, msg, lp, conns[i], C_MIN); - s->terminal_length = (unsigned long long*)calloc(s->num_vcs, sizeof(unsigned long long)); - s->in_send_loop = 0; - s->issueIdle = 0; + if (scores[i] > best_score) { + best_score = scores[i]; + best_score_index = i; + } + } + } - return; + return conns[best_score_index]; } -/* sets up the router virtual channels, global channels, - * local channels, compute node channels */ -void router_plus_setup(router_state *r, tw_lp *lp) +static vector< Connection > dfp_select_two_connections(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, vector< Connection > conns) { - // printf("%d: Router Init()\n",lp->gid); - - char anno[MAX_NAME_LENGTH]; - codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, &mapping_type_id, anno, - &mapping_rep_id, &mapping_offset); - - if (anno[0] == '\0') { - r->anno = NULL; - r->params = &all_params[num_params - 1]; - } - else { - r->anno = strdup(anno); - int id = configuration_get_annotation_index(anno, anno_map); - r->params = &all_params[id]; + if(conns.size() < 2) { + if(conns.size() == 1) + return conns; + if(conns.size() == 0) + return vector< Connection>(); } - // shorthand - const dragonfly_plus_param *p = r->params; - - num_routers_per_mgrp = - codes_mapping_get_lp_count(lp_group_name, 1, "modelnet_dragonfly_plus_router", NULL, 0); - int num_grp_reps = codes_mapping_get_group_reps(lp_group_name); - if (p->total_routers != num_grp_reps * num_routers_per_mgrp) - tw_error(TW_LOC, - "\n Config error: num_routers specified %d total routers computed in the network %d " - "does not match with repetitions * dragonfly_router %d ", - p->num_routers, p->total_routers, num_grp_reps * num_routers_per_mgrp); + int rand_sel_1, rand_sel_2_offset; - r->router_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); - r->group_id = r->router_id / p->num_routers; + int num_conns = conns.size(); - // printf("\n Local router id %d global id %d ", r->router_id, lp->gid); + msg->num_rngs +=2; + rand_sel_1 = tw_rand_integer(lp->rng, 0, num_conns-1); + rand_sel_2_offset = tw_rand_integer(lp->rng, 0, num_conns-1); //number of indices to count up from the previous selected one. Avoids selecting same one twice + int rand_sel_2 = (rand_sel_1 + rand_sel_2_offset) % num_conns; - r->num_rtr_rc_windows = 100; - r->is_monitoring_bw = 0; - r->fwd_events = 0; - r->rev_events = 0; + vector< Connection > retVec; + retVec.push_back(conns[rand_sel_1]); + retVec.push_back(conns[rand_sel_2]); - // QoS related variables - int num_qos_levels = p->num_qos_levels; + return retVec; +} - // Determine if router is a spine or a leaf - int intra_group_id = r->router_id % p->num_routers; - if (intra_group_id >= (p->num_routers / 2)) { //TODO this assumes symmetric spine and leafs - r->dfp_router_type = SPINE; - assert(router_type_map[r->router_id] == SPINE); - // printf("%lu: %i is a SPINE\n",lp->gid, r->router_id); +//two rngs per call +//TODO this defaults to minimality of min, at time of implementation all connections in conns are of same minimality so their scores compared to each other don't matter on minimality +static Connection get_best_connection_from_conns(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, vector conns) +{ + if (conns.size() == 0) { + Connection bad_conn; + bad_conn.src_gid = -1; + bad_conn.port = -1; + return bad_conn; } - else { - r->dfp_router_type = LEAF; - assert(router_type_map[r->router_id] == LEAF); - // printf("%lu: %i is a LEAF\n",lp->gid, r->router_id); + if (conns.size() < 2) { + return conns[0]; } -#if DEBUG_QOS == 1 - char rtr_bw_log[128]; - sprintf(rtr_bw_log, "router-bw-tracker-%d", g_tw_mynode); - - if(dragonfly_rtr_bw_log == NULL) - { - dragonfly_rtr_bw_log = fopen(rtr_bw_log, "w+"); - - fprintf(dragonfly_rtr_bw_log, "\n router-id time-stamp port-id qos-level bw-consumed qos-status qos-data busy-time"); - } -#endif - r->connMan = &connManagerList[r->router_id]; - - r->gc_usage = (int *) calloc(p->num_global_connections, sizeof(int)); - - r->global_channel = (int *) calloc(p->num_global_connections, sizeof(int)); - r->next_output_available_time = (tw_stime *) calloc(p->radix, sizeof(tw_stime)); - r->link_traffic = (int64_t *) calloc(p->radix, sizeof(int64_t)); - r->link_traffic_sample = (int64_t *) calloc(p->radix, sizeof(int64_t)); - - r->vc_occupancy = (int **) calloc(p->radix, sizeof(int *)); - r->qos_data = (unsigned long long**)calloc(p->radix, sizeof(unsigned long long*)); - r->last_qos_lvl = (int*)calloc(p->radix, sizeof(int)); - r->qos_status = (int**)calloc(p->radix, sizeof(int*)); - r->in_send_loop = (int *) calloc(p->radix, sizeof(int)); - r->pending_msgs = - (terminal_plus_message_list ***) calloc(p->radix, sizeof(terminal_plus_message_list **)); - r->pending_msgs_tail = - (terminal_plus_message_list ***) calloc(p->radix, sizeof(terminal_plus_message_list **)); - r->queued_msgs = - (terminal_plus_message_list ***) calloc(p->radix, sizeof(terminal_plus_message_list **)); - r->queued_msgs_tail = - (terminal_plus_message_list ***) calloc(p->radix, sizeof(terminal_plus_message_list **)); - r->queued_count = (int *) calloc(p->radix, sizeof(int)); - r->last_buf_full = (tw_stime*) calloc(p->radix, sizeof(tw_stime *)); - r->busy_time = (tw_stime *) calloc(p->radix, sizeof(tw_stime)); - r->busy_time_sample = (tw_stime *) calloc(p->radix, sizeof(tw_stime)); - - /* set up for ROSS stats sampling */ - r->link_traffic_ross_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); - r->busy_time_ross_sample = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); - if (g_st_model_stats) - lp->model_types->mstat_sz = sizeof(tw_lpid) + (sizeof(int64_t) + sizeof(tw_stime)) * p->radix; - if (g_st_use_analysis_lps && g_st_model_stats) - lp->model_types->sample_struct_sz = sizeof(struct dfly_router_sample) + (sizeof(tw_stime) + sizeof(int64_t)) * p->radix; - r->ross_rsample.busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); - r->ross_rsample.link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); - - rc_stack_create(&r->st); + int num_to_compare = 2; //TODO make this a configurable + vector< Connection > selected_conns = dfp_select_two_connections(s, bf, msg, lp, conns); - for (int i = 0; i < p->radix; i++) { - // Set credit & router occupancy - r->last_buf_full[i] = 0.0; - r->busy_time[i] = 0.0; - r->busy_time_sample[i] = 0.0; - r->next_output_available_time[i] = 0; - r->last_qos_lvl[i] = 0; - r->link_traffic[i] = 0; - r->link_traffic_sample[i] = 0; - r->queued_count[i] = 0; - r->in_send_loop[i] = 0; - r->vc_occupancy[i] = (int *) calloc(p->num_vcs, sizeof(int)); - r->pending_msgs[i] = - (terminal_plus_message_list **) calloc(p->num_vcs, sizeof(terminal_plus_message_list *)); - r->pending_msgs_tail[i] = - (terminal_plus_message_list **) calloc(p->num_vcs, sizeof(terminal_plus_message_list *)); - r->queued_msgs[i] = - (terminal_plus_message_list **) calloc(p->num_vcs, sizeof(terminal_plus_message_list *)); - r->queued_msgs_tail[i] = - (terminal_plus_message_list **) calloc(p->num_vcs, sizeof(terminal_plus_message_list *)); - - r->qos_status[i] = (int*)calloc(num_qos_levels, sizeof(int)); - r->qos_data[i] = (unsigned long long*)calloc(num_qos_levels, sizeof(unsigned long long)); - - for(int j = 0; j < num_qos_levels; j++) + int scores[num_to_compare]; + int best_score_index = 0; + if (scoring_preference == LOWER) { + + int best_score = INT_MAX; + for(int i = 0; i < num_to_compare; i++) { - r->qos_status[i][j] = Q_ACTIVE; - r->qos_data[i][j] = 0; - } - for (int j = 0; j < p->num_vcs; j++) { - r->vc_occupancy[i][j] = 0; - r->pending_msgs[i][j] = NULL; - r->pending_msgs_tail[i][j] = NULL; - r->queued_msgs[i][j] = NULL; - r->queued_msgs_tail[i][j] = NULL; + scores[i] = dfp_score_connection(s, bf, msg, lp, selected_conns[i], C_MIN); + + if (scores[i] < best_score) { + best_score = scores[i]; + best_score_index = i; + } } } + else { + + int best_score = 0; + for(int i = 0; i < num_to_compare; i++) + { + scores[i] = dfp_score_connection(s, bf, msg, lp, selected_conns[i], C_MIN); - r->connMan->solidify_connections(); + if (scores[i] > best_score) { + best_score = scores[i]; + best_score_index = i; + } + } + } - return; -} -int get_vcg_from_category(terminal_plus_message * msg) -{ - if(strcmp(msg->category, "high") == 0) - return Q_HIGH; - else if(strcmp(msg->category, "medium") == 0) - return Q_MEDIUM; - else - tw_error(TW_LOC, "\n priority needs to be specified with qos_levels > 1 %s", msg->category); + return selected_conns[best_score_index]; } -static int get_rtr_bandwidth_consumption(router_state * s, int qos_lvl, int output_port) +int dragonfly_plus_get_router_type(int router_id, const dragonfly_plus_param *p) { - assert(qos_lvl >= Q_HIGH && qos_lvl <= Q_LOW); - assert(output_port < s->params->intra_grp_radix + s->params->num_global_connections + s->params->num_cn); + int num_groups = p->num_groups; + int num_routers = p->num_routers; + int num_router_leaf = p->num_router_leaf; - int bandwidth = s->params->cn_bandwidth; - if(output_port < s->params->intra_grp_radix) - bandwidth = s->params->local_bandwidth; - else if(output_port < s->params->intra_grp_radix + s->params->num_global_connections) - bandwidth = s->params->global_bandwidth; - - /* conversion into bytes/sec from GiB/sec */ - double max_bw = bandwidth * 1024.0 * 1024.0 * 1024.0; - /* conversion into bytes per one nanosecs */ - double max_bw_per_ns = max_bw / (1000.0 * 1000.0 * 1000.0); - /* derive maximum bytes that can be transferred during the window */ - double max_bytes_per_win = max_bw_per_ns * bw_reset_window; - - int percent_bw = (((double)s->qos_data[output_port][qos_lvl]) / max_bytes_per_win) * 100; -// printf("\n percent bw consumed by qos_lvl %d is %d bytes transferred %d max_bw %lf ", qos_lvl, percent_bw, s->qos_data[output_port][qos_lvl], max_bw_per_ns); - return percent_bw; + int group_id = router_id / num_groups; + int router_local_id = router_id % num_routers; + if (router_local_id > num_router_leaf) + return SPINE; + else + return LEAF; } -static int get_term_bandwidth_consumption(terminal_state * s, int qos_lvl) +/* get the router id associated with a given terminal id */ +int dragonfly_plus_get_assigned_router_id(int terminal_id, const dragonfly_plus_param *p) { - assert(qos_lvl >= Q_HIGH && qos_lvl <= Q_LOW); + // currently supports symmetrical bipartite spine/leaf router configurations + // first half of routers in a given group are leafs which have terminals + // second half of routers in a given group are spines which have no terminals + int num_groups = p->num_groups; // number of groups of routers in the network + int num_routers = p->num_routers; // num routers per group + int num_router_leaf = p->num_router_leaf; // num leaf routers per group + int num_cn = p->num_cn; // num compute nodes per leaf router + int num_cn_per_group = (num_router_leaf * num_cn); - /* conversion into bytes/sec from GiB/sec */ - double max_bw = s->params->cn_bandwidth * 1024.0 * 1024.0 * 1024.0; - /* conversion into bytes per one nanosecs */ - double max_bw_per_ns = max_bw / (1000.0 * 1000.0 * 1000.0); - /* derive maximum bytes that can be transferred during the window */ - double max_bytes_per_win = max_bw_per_ns * bw_reset_window; - int percent_bw = (((double)s->qos_data[qos_lvl]) / max_bytes_per_win) * 100; -// printf("\n At terminal %lf max bytes %d percent %d ", max_bytes_per_win, s->qos_data[qos_lvl], percent_bw); - return percent_bw; + int group_id = terminal_id / num_cn_per_group; + int local_router_id = (terminal_id / num_cn) % num_router_leaf; + int router_id = (group_id * num_routers) + local_router_id; + + return router_id; } -void issue_rtr_bw_monitor_event_rc(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +static void append_to_terminal_plus_message_list(terminal_plus_message_list **thisq, + terminal_plus_message_list **thistail, + int index, + terminal_plus_message_list *msg) { - int radix = s->params->radix; - int num_qos_levels = s->params->num_qos_levels; + if (thisq[index] == NULL) { + thisq[index] = msg; + } + else { + thistail[index]->next = msg; + msg->prev = thistail[index]; + } + thistail[index] = msg; +} - for(int i = 0 ; i < msg->num_cll; i++) - codes_local_latency_reverse(lp); +static void prepend_to_terminal_plus_message_list(terminal_plus_message_list **thisq, + terminal_plus_message_list **thistail, + int index, + terminal_plus_message_list *msg) +{ + if (thisq[index] == NULL) { + thistail[index] = msg; + } + else { + thisq[index]->prev = msg; + msg->next = thisq[index]; + } + thisq[index] = msg; +} - if(msg->rc_is_qos_set == 1) - { - for(int i = 0; i < radix; i++) - { - for(int j = 0; j < num_qos_levels; j++) - { - s->qos_data[i][j] = *(indexer2d(msg->rc_qos_data, i, j, radix, num_qos_levels)); - s->qos_status[i][j] = *(indexer2d(msg->rc_qos_status, i, j, radix, num_qos_levels)); - } +static terminal_plus_message_list *return_head(terminal_plus_message_list **thisq, + terminal_plus_message_list **thistail, + int index) +{ + terminal_plus_message_list *head = thisq[index]; + if (head != NULL) { + thisq[index] = head->next; + if (head->next != NULL) { + head->next->prev = NULL; + head->next = NULL; + } + else { + thistail[index] = NULL; } + } + return head; +} - free(msg->rc_qos_data); - free(msg->rc_qos_status); - msg->rc_is_qos_set = 0; +static terminal_plus_message_list *return_tail(terminal_plus_message_list **thisq, + terminal_plus_message_list **thistail, + int index) +{ + terminal_plus_message_list *tail = thistail[index]; + assert(tail); + if (tail->prev != NULL) { + tail->prev->next = NULL; + thistail[index] = tail->prev; + tail->prev = NULL; + } + else { + thistail[index] = NULL; + thisq[index] = NULL; } + return tail; } -void issue_rtr_bw_monitor_event(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) + +void dragonfly_plus_print_params(const dragonfly_plus_param *p, _IO_FILE * st) { - msg->num_cll = 0; - msg->num_rngs = 0; + if (!st) + st = stdout; + + fprintf(st,"\n------------------ Dragonfly Plus Parameters ---------\n"); + fprintf(st,"\tnum_routers = %d\n",p->num_routers); + fprintf(st,"\tlocal_bandwidth = %.2f\n",p->local_bandwidth); + fprintf(st,"\tglobal_bandwidth = %.2f\n",p->global_bandwidth); + fprintf(st,"\tcn_bandwidth = %.2f\n",p->cn_bandwidth); + fprintf(st,"\tnum_vcs = %d\n",p->num_vcs); + fprintf(st,"\tlocal_vc_size = %d\n",p->local_vc_size); + fprintf(st,"\tglobal_vc_size = %d\n",p->global_vc_size); + fprintf(st,"\tcn_vc_size = %d\n",p->cn_vc_size); + fprintf(st,"\tchunk_size = %d\n",p->chunk_size); + fprintf(st,"\tnum_cn = %d\n",p->num_cn); + fprintf(st,"\tintra_grp_radix = %d\n",p->intra_grp_radix); + fprintf(st,"\tnum_qos_levels = %d\n",p->num_qos_levels); + fprintf(st,"\tnum_router_spine = %d\n",p->num_router_spine); + fprintf(st,"\tnum_router_leaf = %d\n",p->num_router_leaf); + fprintf(st,"\tmax_port_score = %ld\n",p->max_port_score); + fprintf(st,"\tnum_groups = %d\n",p->num_groups); + fprintf(st,"\tvirtual radix = %d\n",p->radix); + fprintf(st,"\ttotal_routers = %d\n",p->total_routers); + fprintf(st,"\ttotal_terminals = %d\n",p->total_terminals); + fprintf(st,"\tnum_global_connections = %d\n",p->num_global_connections); + fprintf(st,"\tcn_delay = %.2f\n",p->cn_delay); + fprintf(st,"\tlocal_delay = %.2f\n",p->local_delay); + fprintf(st,"\tglobal_delay = %.2f\n",p->global_delay); + fprintf(st,"\tlocal credit_delay = %.2f\n",p->local_credit_delay); + fprintf(st,"\tglobal credit_delay = %.2f\n",p->global_credit_delay); + fprintf(st,"\tcn credit_delay = %.2f\n",p->cn_credit_delay); + fprintf(st,"\trouter_delay = %.2f\n",p->router_delay); + fprintf(st,"\tscoring = %d\n",scoring); + fprintf(st,"\tadaptive_threshold = %d\n",p->adaptive_threshold); + fprintf(st,"\trouting = %s\n",get_routing_alg_chararray(routing)); + fprintf(st,"\tsource_leaf_consider_nonmin = %s\n", (p->source_leaf_consider_nonmin ? "true" : "false")); + fprintf(st,"\tint_spine_consider_min = %s\n", (p->int_spine_consider_min ? "true" : "false")); + fprintf(st,"\tdest_spine_consider_nonmin = %s\n", (p->dest_spine_consider_nonmin ? "true" : "false")); + fprintf(st,"\tdest_spine_consider_gnonmin = %s\n", (p->dest_spine_consider_global_nonmin ? "true" : "false")); + fprintf(st,"\tmax hops notification = %d\n",p->max_hops_notify); + fprintf(st,"------------------------------------------------------\n\n"); +} - int radix = s->params->radix; - int num_qos_levels = s->params->num_qos_levels; - +static void dragonfly_read_config(const char *anno, dragonfly_plus_param *params) +{ + /*Adding init for router magic number*/ + uint32_t h1 = 0, h2 = 0; + bj_hashlittle2(LP_METHOD_NM_ROUT, strlen(LP_METHOD_NM_ROUT), &h1, &h2); + router_magic_num = h1 + h2; - //RC data storage start. - //Allocate memory here for these pointers that are stored in the events. FREE THESE IN RC OR IN COMMIT_F - msg->rc_qos_data = (unsigned long long *) calloc(radix * num_qos_levels, sizeof(unsigned long long)); - msg->rc_qos_status = (int *) calloc(radix * num_qos_levels, sizeof(int)); + bj_hashlittle2(LP_METHOD_NM_TERM, strlen(LP_METHOD_NM_TERM), &h1, &h2); + terminal_magic_num = h1 + h2; - //store qos data and status into the arrays. Pointers to the arrays are stored in events. - for(int i = 0; i < radix; i++) - { - for(int j = 0; j < num_qos_levels; j++) - { - *(indexer2d(msg->rc_qos_data, i, j, radix, num_qos_levels)) = s->qos_data[i][j]; - *(indexer2d(msg->rc_qos_status, i, j, radix, num_qos_levels)) = s->qos_status[i][j]; - } + // shorthand + dragonfly_plus_param *p = params; + int myRank; + MPI_Comm_rank(MPI_COMM_CODES, &myRank); + + int rc = configuration_get_value_int(&config, "PARAMS", "local_vc_size", anno, &p->local_vc_size); + if (rc) { + p->local_vc_size = 1024; + if(!myRank) + fprintf(stderr, "Buffer size of local channels not specified, setting to %d\n", p->local_vc_size); } - msg->rc_is_qos_set = 1; - //RC data storage end. + rc = configuration_get_value_int(&config, "PARAMS", "global_vc_size", anno, &p->global_vc_size); + if (rc) { + p->global_vc_size = 2048; + if(!myRank) + fprintf(stderr, "Buffer size of global channels not specified, setting to %d\n", p->global_vc_size); + } + + rc = configuration_get_value_int(&config, "PARAMS", "num_qos_levels", anno, &p->num_qos_levels); + if(rc) { + p->num_qos_levels = 1; + if(!myRank) + fprintf(stderr, "Number of QOS levels not specified, setting to %d\n", p->num_qos_levels); + } + + char qos_levels_str[MAX_NAME_LENGTH]; + rc = configuration_get_value(&config, "PARAMS", "qos_bandwidth", anno, qos_levels_str, MAX_NAME_LENGTH); + p->qos_bandwidths = (int*)calloc(p->num_qos_levels, sizeof(int)); - for(int i = 0; i < radix; i++) + if(p->num_qos_levels > 1) { - for(int j = 0; j < num_qos_levels; j++) + int total_bw = 0; + char * token; + token = strtok(qos_levels_str, ","); + int i = 0; + while(token != NULL) { - int bw_consumed = get_rtr_bandwidth_consumption(s, j, i); - - #if DEBUG_QOS == 1 - if(dragonfly_rtr_bw_log != NULL) + sscanf(token, "%d", &p->qos_bandwidths[i]); + total_bw += p->qos_bandwidths[i]; + if(p->qos_bandwidths[i] <= 0) { - if(s->qos_data[j][k] > 0) - { - fprintf(dragonfly_rtr_bw_log, "\n %d %f %d %d %d %d %d %f", s->router_id, tw_now(lp), i, j, bw_consumed, s->qos_status[i][j], s->qos_data[i][j], s->busy_time_sample[i]); - } + tw_error(TW_LOC, "\nInvalid bandwidth levels\n"); } - #endif + i++; + token = strtok(NULL,","); } + assert(total_bw <= 100); } + else + p->qos_bandwidths[0] = 100; - /* Reset the qos status and bandwidth consumption. */ - for(int i = 0; i < s->params->radix; i++) - { - for(int j = 0; j < num_qos_levels; j++) - { - s->qos_status[i][j] = Q_ACTIVE; - s->qos_data[i][j] = 0; - } - s->busy_time_sample[i] = 0; - s->ross_rsample.busy_time[i] = 0; - } + rc = configuration_get_value_double(&config, "PARAMS", "max_qos_monitor", anno, &max_qos_monitor); + if(rc) { + if(!myRank) + fprintf(stderr, "Setting max_qos_monitor to %lf\n", max_qos_monitor); + } - if(tw_now(lp) > max_qos_monitor) - return; - - msg->num_cll++; - tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); - terminal_plus_message *m; - tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, - DRAGONFLY_PLUS_ROUTER, (void**)&m, NULL); - m->type = R_BANDWIDTH; - m->magic = router_magic_num; - tw_event_send(e); -} + rc = configuration_get_value_int(&config, "PARAMS", "cn_vc_size", anno, &p->cn_vc_size); + if (rc) { + p->cn_vc_size = 1024; + if(!myRank) + fprintf(stderr, "Buffer size of compute node channels not specified, setting to %d\n", p->cn_vc_size); + } -void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_plus_message * msg, tw_lp * lp) -{ - for(int i = 0 ; i < msg->num_cll; i++) - codes_local_latency_reverse(lp); - - int num_qos_levels = s->params->num_qos_levels; - - if(msg->rc_is_qos_set == 1) - { - for(int i = 0; i < num_qos_levels; i++) - { - s->qos_data[i] = msg->rc_qos_data[i]; - s->qos_status[i] = msg->rc_qos_status[i]; - } + rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", anno, &p->chunk_size); + if (rc) { + p->chunk_size = 512; + if(!myRank) + fprintf(stderr, "Chunk size for packets is specified, setting to %d\n", p->chunk_size); + } - free(msg->rc_qos_data); - free(msg->rc_qos_status); - msg->rc_is_qos_set = 0; + rc = configuration_get_value_double(&config, "PARAMS", "local_bandwidth", anno, &p->local_bandwidth); + if (rc) { + p->local_bandwidth = 5.25; + if(!myRank) + fprintf(stderr, "Bandwidth of local channels not specified, setting to %lf\n", p->local_bandwidth); } - -} -/* resets the bandwidth numbers recorded so far */ -void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_plus_message * msg, tw_lp * lp) -{ - - msg->num_cll = 0; - msg->num_rngs = 0; - int num_qos_levels = s->params->num_qos_levels; - - //RC data storage start. - //Allocate memory here for these pointers that are stored in the events. FREE THESE IN RC OR IN COMMIT_F - msg->rc_qos_data = (unsigned long long *) calloc(num_qos_levels, sizeof(unsigned long long)); - msg->rc_qos_status = (int *) calloc(num_qos_levels, sizeof(int)); - //store qos data and status into the arrays. Pointers to the arrays are stored in events. - for(int i = 0; i < num_qos_levels; i++) - { - msg->rc_qos_data[i] = s->qos_data[i]; - msg->rc_qos_status[i] = s->qos_status[i]; + rc = configuration_get_value_double(&config, "PARAMS", "global_bandwidth", anno, &p->global_bandwidth); + if (rc) { + p->global_bandwidth = 4.7; + if(!myRank) + fprintf(stderr, "Bandwidth of global channels not specified, setting to %lf\n", p->global_bandwidth); } - msg->rc_is_qos_set = 1; - //RC data storage end. - /* Reset the qos status and bandwidth consumption. */ - for(int i = 0; i < num_qos_levels; i++) - { - s->qos_status[i] = Q_ACTIVE; - s->qos_data[i] = 0; + rc = configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", anno, &p->cn_bandwidth); + if (rc) { + p->cn_bandwidth = 5.25; + if(!myRank) + fprintf(stderr, "Bandwidth of compute node channels not specified, setting to %lf\n", p->cn_bandwidth); } - - - if(tw_now(lp) > max_qos_monitor) - return; - - msg->num_cll++; - terminal_plus_message * m; - tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); - tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, DRAGONFLY_PLUS, - (void**)&m, NULL); - m->type = T_BANDWIDTH; - m->magic = terminal_magic_num; - tw_event_send(e); -} -static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_plus_message * msg, tw_lp * lp) -{ - int num_qos_levels = s->params->num_qos_levels; - - if(num_qos_levels == 1) - { - if(s->terminal_msgs[0] == NULL || ((s->vc_occupancy[0] + s->params->chunk_size) > s->params->cn_vc_size)) - return -1; - else - return 0; + rc = configuration_get_value_double(&config, "PARAMS", "router_delay", anno, &p->router_delay); + if (rc) { + p->router_delay = 100; } - int bw_consumption[num_qos_levels]; + configuration_get_value(&config, "PARAMS", "cn_sample_file", anno, cn_sample_file, MAX_NAME_LENGTH); + configuration_get_value(&config, "PARAMS", "rt_sample_file", anno, router_sample_file, MAX_NAME_LENGTH); - /* First make sure the bandwidth consumptions are up to date. */ - for(int k = 0; k < num_qos_levels; k++) - { - if(s->qos_status[k] != Q_OVERBW) - { - bw_consumption[k] = get_term_bandwidth_consumption(s, k); - if(bw_consumption[k] > s->params->qos_bandwidths[k]) - { - if(k == 0) - msg->qos_reset1 = 1; - else if(k == 1) - msg->qos_reset2 = 1; - - s->qos_status[k] = Q_OVERBW; - } - } - } - if(BW_MONITOR == 1) - { - for(int i = 0; i < num_qos_levels; i++) - { - if(s->qos_status[i] == Q_ACTIVE) - { - if(s->terminal_msgs[i] != NULL && ((s->vc_occupancy[i] + s->params->chunk_size) <= s->params->cn_vc_size)) - return i; - } - } + char routing_str[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "routing", anno, routing_str, MAX_NAME_LENGTH); + if (strcmp(routing_str, "minimal") == 0) + routing = MINIMAL; + else if (strcmp(routing_str, "non-minimal-spine") == 0) + routing = NON_MINIMAL_SPINE; + else if (strcmp(routing_str, "non-minimal-leaf") == 0) + routing = NON_MINIMAL_LEAF; + else if (strcmp(routing_str, "prog-adaptive") == 0) + routing = PROG_ADAPTIVE; + else if (strcmp(routing_str, "fully-prog-adaptive") == 0) + routing = FULLY_PROG_ADAPTIVE; + else { + if(!myRank) + fprintf(stderr, "No routing protocol specified, setting to minimal routing\n"); + routing = MINIMAL; } - int next_rr_vcg = (s->last_qos_lvl + 1) % num_qos_levels; - /* All vcgs are exceeding their bandwidth limits*/ - for(int i = 0; i < num_qos_levels; i++) - { - if(s->terminal_msgs[i] != NULL && ((s->vc_occupancy[i] + s->params->chunk_size) <= s->params->cn_vc_size)) - { - bf->c2 = 1; - - if(msg->last_saved_qos < 0) - msg->last_saved_qos = s->last_qos_lvl; - - s->last_qos_lvl = next_rr_vcg; - return i; - } - next_rr_vcg = (next_rr_vcg + 1) % num_qos_levels; + rc = configuration_get_value_int(&config, "PARAMS", "notification_on_hops_greater_than", anno, &p->max_hops_notify); + if (rc) { + if(!myRank) + fprintf(stderr, "Maximum hops for notifying not specified, setting to INT MAX\n"); + p->max_hops_notify = INT_MAX; } - return -1; -} - -/* MM: These packet events (packet_send, packet_receive etc.) will be used as is, basically, the routing - * functions will be changed only. */ -/* dragonfly packet event , generates a dragonfly packet on the compute node */ -static tw_stime dragonfly_plus_packet_event(model_net_request const *req, - uint64_t message_offset, - uint64_t packet_size, - tw_stime offset, - mn_sched_params const *sched_params, - void const *remote_event, - void const *self_event, - tw_lp *sender, - int is_last_pckt) -{ - (void) message_offset; - (void) sched_params; - tw_event *e_new; - tw_stime xfer_to_nic_time; - terminal_plus_message *msg; - char *tmp_ptr; - - xfer_to_nic_time = codes_local_latency(sender); - // e_new = tw_event_new(sender->gid, xfer_to_nic_time+offset, sender); - // msg = tw_event_data(e_new); - e_new = model_net_method_event_new(sender->gid, xfer_to_nic_time + offset, sender, DRAGONFLY_PLUS, - (void **) &msg, (void **) &tmp_ptr); - strcpy(msg->category, req->category); - msg->final_dest_gid = req->final_dest_lp; - msg->total_size = req->msg_size; - msg->sender_lp = req->src_lp; - msg->sender_mn_lp = sender->gid; - msg->packet_size = packet_size; - msg->travel_start_time = tw_now(sender); - msg->remote_event_size_bytes = 0; - msg->local_event_size_bytes = 0; - msg->type = T_GENERATE; - msg->dest_terminal_id = req->dest_mn_lp; - msg->dfp_dest_terminal_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_id,0,0); - msg->message_id = req->msg_id; - msg->is_pull = req->is_pull; - msg->pull_size = req->pull_size; - msg->magic = terminal_magic_num; - msg->msg_start_time = req->msg_start_time; - if (is_last_pckt) /* Its the last packet so pass in remote and local event information*/ - { - if (req->remote_event_size > 0) { - msg->remote_event_size_bytes = req->remote_event_size; - memcpy(tmp_ptr, remote_event, req->remote_event_size); - tmp_ptr += req->remote_event_size; - } - if (req->self_event_size > 0) { - msg->local_event_size_bytes = req->self_event_size; - memcpy(tmp_ptr, self_event, req->self_event_size); - tmp_ptr += req->self_event_size; - } + int src_leaf_cons_choice; + rc = configuration_get_value_int(&config, "PARAMS", "source_leaf_consider_nonmin", anno, &src_leaf_cons_choice); + if (rc) { + // fprintf(stderr, "Source leaf consideration of nonmin ports not specified. Defaulting to True\n"); + p->source_leaf_consider_nonmin = true; } - // printf("\n dragonfly remote event %d local event %d last packet %d %lf ", msg->remote_event_size_bytes, - // msg->local_event_size_bytes, is_last_pckt, xfer_to_nic_time); - tw_event_send(e_new); - return xfer_to_nic_time; -} - -/* dragonfly packet event reverse handler */ -static void dragonfly_plus_packet_event_rc(tw_lp *sender) -{ - codes_local_latency_reverse(sender); - return; -} - -/*MM: This will also be used as is. This is meant to sent a credit back to the - * sending router. */ -/*When a packet is sent from the current router and a buffer slot becomes available, a credit is sent back to - * schedule another packet event*/ -static void router_credit_send(router_state *s, terminal_plus_message *msg, tw_lp *lp, int sq, short* rng_counter) -{ - tw_event *buf_e; - tw_stime ts; - terminal_plus_message *buf_msg; - - int dest = 0, type = R_BUFFER; - int is_terminal = 0; + else if (src_leaf_cons_choice == 1) { + p->source_leaf_consider_nonmin = true; + } + else + p->source_leaf_consider_nonmin = false; - const dragonfly_plus_param *p = s->params; - // Notify sender terminal about available buffer space - if (msg->last_hop == TERMINAL) { - dest = msg->src_terminal_id; - type = T_BUFFER; - is_terminal = 1; + int int_spn_cons_choice; + rc = configuration_get_value_int(&config, "PARAMS", "int_spine_consider_min", anno, &int_spn_cons_choice); + if (rc) { + // fprintf(stderr, "Int spine consideration of min ports not specified. Defaulting to False\n"); + p->int_spine_consider_min = false; } - else if (msg->last_hop == GLOBAL || msg->last_hop == LOCAL) { - dest = msg->intm_lp_id; + else if (int_spn_cons_choice == 1) { + p->int_spine_consider_min = true; } else - printf("\n Invalid message type"); - - (*rng_counter)++; - ts = g_tw_lookahead + p->credit_delay + tw_rand_unif(lp->rng); + p->int_spine_consider_min = false; - if (is_terminal) { - buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_PLUS, (void **) &buf_msg, NULL); - buf_msg->magic = terminal_magic_num; + int dst_spn_cons_choice; + rc = configuration_get_value_int(&config, "PARAMS", "dest_spine_consider_nonmin", anno, &dst_spn_cons_choice); + if (rc) { + // fprintf(stderr, "Dest spine consideration of nonmin ports not specified. Defaulting to False\n"); + p->dest_spine_consider_nonmin = false; } - else { - buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_PLUS_ROUTER, (void **) &buf_msg, NULL); - buf_msg->magic = router_magic_num; + else if (dst_spn_cons_choice == 1) { + p->dest_spine_consider_nonmin = true; } + else + p->dest_spine_consider_nonmin = false; - buf_msg->origin_router_id = s->router_id; - if (sq == -1) { - buf_msg->vc_index = msg->vc_index; - buf_msg->output_chan = msg->output_chan; + + int dst_spn_gcons_choice; + rc = configuration_get_value_int(&config, "PARAMS", "dest_spine_consider_global_nonmin", anno, &dst_spn_gcons_choice); + if (rc) { + // fprintf(stderr, "Dest spine consideration of global nonmin ports not specified. Defaulting to True\n"); + p->dest_spine_consider_global_nonmin = true; } - else { - buf_msg->vc_index = msg->saved_vc; - buf_msg->output_chan = msg->saved_channel; + else if (dst_spn_gcons_choice == 1) { + p->dest_spine_consider_global_nonmin = true; } + else + p->dest_spine_consider_global_nonmin = false; - strcpy(buf_msg->category, msg->category); - buf_msg->type = type; - tw_event_send(buf_e); - return; -} + /* MM: This should be 2 for dragonfly plus*/ + p->num_vcs = 2; + + if(p->num_qos_levels > 1) + p->num_vcs = p->num_qos_levels * p->num_vcs; -static void packet_generate_rc(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) -{ - s->packet_gen--; - packet_gen--; - s->packet_counter--; - - for(int i = 0; i < msg->num_rngs; i++) - tw_rand_reverse_unif(lp->rng); + rc = configuration_get_value_int(&config, "PARAMS", "num_groups", anno, &p->num_groups); + if (rc) { + tw_error(TW_LOC, "\nnum_groups not specified, Aborting\n"); + } + rc = configuration_get_value_int(&config, "PARAMS", "num_router_spine", anno, &p->num_router_spine); + if (rc) { + tw_error(TW_LOC, "\nnum_router_spine not specified, Aborting\n"); + } + rc = configuration_get_value_int(&config, "PARAMS", "num_router_leaf", anno, &p->num_router_leaf); + if (rc) { + tw_error(TW_LOC, "\nnum_router_leaf not specified, Aborting\n"); + } - for(int i = 0; i < msg->num_cll; i++) - codes_local_latency_reverse(lp); + p->num_routers = p->num_router_spine + p->num_router_leaf; // num routers per group + p->intra_grp_radix = max(p->num_router_spine, p->num_router_leaf); //TODO: Is this sufficient? If there are parallel intra connecitons, this will break. - int num_qos_levels = s->params->num_qos_levels; - if(bf->c1) - s->is_monitoring_bw = 0; + rc = configuration_get_value_int(&config, "PARAMS", "num_cns_per_router", anno, &p->num_cn); + if (rc) { + if(!myRank) + fprintf(stderr,"Number of cns per router not specified, setting to %d\n", 4); + p->num_cn = 4; + } - int num_chunks = msg->packet_size / s->params->chunk_size; - if (msg->packet_size < s->params->chunk_size) - num_chunks++; + rc = configuration_get_value_int(&config, "PARAMS", "num_global_connections", anno, &p->num_global_connections); + if (rc) { + tw_error(TW_LOC, "\nnum_global_connections per router not specified, abortin..."); + } + p->radix = p->intra_grp_radix + p->num_global_connections + + p->num_cn; // TODO this may not be sufficient, radix isn't same for leaf and spine routers + p->total_routers = p->num_groups * p->num_routers; + p->total_terminals = (p->num_groups * p->num_router_leaf) * p->num_cn; - - int vcg = 0; - if(num_qos_levels > 1) - { - vcg = get_vcg_from_category(msg); - assert(vcg == Q_HIGH || vcg == Q_MEDIUM); + char scoring_str[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "route_scoring_metric", anno, scoring_str, MAX_NAME_LENGTH); + if (strcmp(scoring_str, "alpha") == 0) { + scoring = ALPHA; + scoring_preference = LOWER; } - assert(vcg < num_qos_levels); - - int i; - for (i = 0; i < num_chunks; i++) { - delete_terminal_plus_message_list(return_tail(s->terminal_msgs, s->terminal_msgs_tail, vcg)); - s->terminal_length[vcg] -= s->params->chunk_size; + else if (strcmp(scoring_str, "beta") == 0) { + scoring = BETA; + scoring_preference = LOWER; } - if (bf->c5) { - s->in_send_loop = 0; + else if (strcmp(scoring_str, "gamma") == 0) { + tw_error(TW_LOC, "Gamma scoring protocol currently non-functional"); //TODO: Fix gamma scoring protocol + scoring = GAMMA; + scoring_preference = HIGHER; } - if (bf->c11) { - s->issueIdle = 0; + else if (strcmp(scoring_str, "delta") == 0) { + scoring = DELTA; + scoring_preference = LOWER; + } + else { + if(!myRank) + fprintf(stderr, "No route scoring protocol specified, setting to alpha scoring\n"); + scoring = ALPHA; + scoring_preference = LOWER; } - struct mn_stats *stat; - stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); - stat->send_count--; - stat->send_bytes -= msg->packet_size; - stat->send_time -= (1 / s->params->cn_bandwidth) * msg->packet_size; -} -/* generates packet at the current dragonfly compute node */ -static void packet_generate(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) -{ - msg->num_rngs = 0; - msg->num_cll = 0; - - packet_gen++; - s->packet_gen++; - - int num_qos_levels = s->params->num_qos_levels; - int vcg = 0; //by default there's only one VC for terminals, VC0. There can be more based on the number of QoS levels + rc = configuration_get_value_int(&config, "PARAMS", "adaptive_threshold", anno, &p->adaptive_threshold); + if (rc) { + if(!myRank) + fprintf(stderr, "Adaptive Minimal Routing Threshold not specified: setting to default = 0. (Will consider minimal and nonminimal routes based on scoring metric alone)\n"); + p->adaptive_threshold = 0; + } - if (num_qos_levels > 1) { - tw_lpid router_id; - codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); - codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 0, s->router_id / num_routers_per_mgrp, s->router_id % num_routers_per_mgrp, &router_id); - if (s->is_monitoring_bw == 0) { - bf->c1 = 1; - /* Issue an event on both terminal and router to monitor bandwidth */ - msg->num_cll++; - tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); - terminal_plus_message * m; - tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, DRAGONFLY_PLUS, (void**)&m, NULL); - m->type = T_BANDWIDTH; - m->magic = terminal_magic_num; - s->is_monitoring_bw = 1; - tw_event_send(e); - } + int largest_vc_size = 0; + if (p->local_vc_size > largest_vc_size) + largest_vc_size = p->local_vc_size; + if (p->global_vc_size > largest_vc_size) + largest_vc_size = p->global_vc_size; + if (p->cn_vc_size > largest_vc_size) + largest_vc_size = p->cn_vc_size; - vcg = get_vcg_from_category(msg); - assert(vcg == Q_HIGH || vcg == Q_MEDIUM); + p->max_port_score = (p->num_vcs * largest_vc_size) + largest_vc_size; //The maximum score that a port can get during the scoring metrics. + + // read intra group connections, store from a router's perspective + // all links to the same router form a vector + char intraFile[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "intra-group-connections", anno, intraFile, MAX_NAME_LENGTH); + if (strlen(intraFile) <= 0) { + tw_error(TW_LOC, "\nIntra group connections file not specified. Aborting\n"); } - assert(vcg < num_qos_levels); - tw_stime ts, nic_ts; + //setup Connection Managers for each router + for(int i = 0; i < p->total_routers; i++) + { + int src_id_global = i; + int src_id_local = i % p->num_routers; + int src_group = i / p->num_routers; - assert(lp->gid != msg->dest_terminal_id); - const dragonfly_plus_param *p = s->params; + ConnectionManager conman = ConnectionManager(src_id_local, src_id_global, src_group, p->intra_grp_radix, p->num_global_connections, p->num_cn, p->num_routers); + connManagerList.push_back(conman); + } - int total_event_size; - uint64_t num_chunks = msg->packet_size / p->chunk_size; - double cn_delay = s->params->cn_delay; + FILE *groupFile = fopen(intraFile, "rb"); + if (!groupFile) + tw_error(TW_LOC, "\nintra-group file not found\n"); - int dest_router_id = dragonfly_plus_get_assigned_router_id(msg->dfp_dest_terminal_id, s->params); - int dest_grp_id = dest_router_id / s->params->num_routers; - int src_grp_id = s->router_id / s->params->num_routers; + IntraGroupLink newLink; + while (fread(&newLink, sizeof(IntraGroupLink), 1, groupFile) != 0) { + int src_id_local = newLink.src; + int dest_id_local = newLink.dest; + for(int i = 0; i < p->total_routers; i++) + { + int group_id = i/p->num_routers; + if (i % p->num_routers == src_id_local) + { + int dest_id_global = group_id * p->num_routers + dest_id_local; + connManagerList[i].add_connection(dest_id_global, CONN_LOCAL); + } + } + } + fclose(groupFile); - if(src_grp_id == dest_grp_id) + //terminal assignment! + for(int i = 0; i < p->total_terminals; i++) { - if(dest_router_id == s->router_id) - //TODO: add RC stuff like in dragonfly-custom.C - num_local_packets_sr++; - else - num_local_packets_sg++; + int assigned_router_id = dragonfly_plus_get_assigned_router_id(i, p); + int assigned_group_id = assigned_router_id / p->num_routers; + connManagerList[assigned_router_id].add_connection(i, CONN_TERMINAL); } - else - num_remote_packets++; - if (msg->packet_size < s->params->chunk_size) - num_chunks++; + // read inter group connections, store from a router's perspective + // also create a group level table that tells all the connecting routers + char interFile[MAX_NAME_LENGTH]; + configuration_get_value(&config, "PARAMS", "inter-group-connections", anno, interFile, MAX_NAME_LENGTH); + if (strlen(interFile) <= 0) { + tw_error(TW_LOC, "\nInter group connections file not specified. Aborting\n"); + } + FILE *systemFile = fopen(interFile, "rb"); + if (!myRank) { + printf("Reading inter-group connectivity file: %s\n", interFile); + printf("\nTotal routers: %d; total groups: %d \n", p->total_routers, p->num_groups); + } - if (msg->packet_size < s->params->chunk_size) - cn_delay = bytes_to_ns(msg->packet_size % s->params->chunk_size, s->params->cn_bandwidth); + connectionList.resize(p->num_groups); + for (int g = 0; g < connectionList.size(); g++) { + connectionList[g].resize(p->num_groups); + } - msg->num_rngs++; - nic_ts = g_tw_lookahead + (num_chunks * cn_delay) + tw_rand_unif(lp->rng); + InterGroupLink newInterLink; + while (fread(&newInterLink, sizeof(InterGroupLink), 1, systemFile) != 0) { + int src_id_global = newInterLink.src; + int src_group_id = src_id_global / p->num_routers; + int dest_id_global = newInterLink.dest; + int dest_group_id = dest_id_global / p->num_routers; - // msg->packet_ID = lp->gid + g_tw_nlp * s->packet_counter; - msg->packet_ID = s->packet_counter; - s->packet_counter++; - msg->my_N_hop = 0; - msg->my_l_hop = 0; - msg->my_g_hop = 0; + // printf("[%d -> %d]\n",src_id_global, dest_id_global); + connManagerList[src_id_global].add_connection(dest_id_global, CONN_GLOBAL); - // if(msg->dest_terminal_id == TRACK) - if (msg->packet_ID == LLU(TRACK_PKT) && lp->gid == T_ID) - printf("\n Packet %llu generated at terminal %d dest %llu size %llu num chunks %llu ", msg->packet_ID, - s->terminal_id, LLU(msg->dest_terminal_id), LLU(msg->packet_size), LLU(num_chunks)); + int r; + for (r = 0; r < connectionList[src_group_id][dest_group_id].size(); r++) { + if (connectionList[src_group_id][dest_group_id][r] == newInterLink.src) + break; + } + if (r == connectionList[src_group_id][dest_group_id].size()) { + connectionList[src_group_id][dest_group_id].push_back(newInterLink.src); + } + } - for (int i = 0; i < num_chunks; i++) { - terminal_plus_message_list *cur_chunk = - (terminal_plus_message_list *) calloc(1, sizeof(terminal_plus_message_list)); - msg->origin_router_id = s->router_id; - init_terminal_plus_message_list(cur_chunk, msg); - - if (msg->remote_event_size_bytes + msg->local_event_size_bytes > 0) { - cur_chunk->event_data = - (char *) calloc(1, msg->remote_event_size_bytes + msg->local_event_size_bytes); - } - - void *m_data_src = model_net_method_get_edata(DRAGONFLY_PLUS, msg); - if (msg->remote_event_size_bytes) { - memcpy(cur_chunk->event_data, m_data_src, msg->remote_event_size_bytes); - } - if (msg->local_event_size_bytes) { - m_data_src = (char *) m_data_src + msg->remote_event_size_bytes; - memcpy((char *) cur_chunk->event_data + msg->remote_event_size_bytes, m_data_src, - msg->local_event_size_bytes); + if (DUMP_CONNECTIONS) + { + if (!myRank) { + for(int i=0; i < connManagerList.size(); i++) + { + connManagerList[i].print_connections(); + } } - - cur_chunk->msg.output_chan = vcg; //By default is 0 but QoS can mean more than just a single VC for terminals - cur_chunk->msg.chunk_id = i; - cur_chunk->msg.origin_router_id = s->router_id; - append_to_terminal_plus_message_list(s->terminal_msgs, s->terminal_msgs_tail, vcg, cur_chunk); - s->terminal_length[vcg] += s->params->chunk_size; } - if (s->terminal_length[vcg] < s->params->cn_vc_size) { - model_net_method_idle_event(nic_ts, 0, lp); + for(int i = 0; i < p->total_routers; i++){ + int loc_id = i % p->num_routers; + if (loc_id < p->num_router_leaf) + router_type_map[i] = LEAF; + else + router_type_map[i] = SPINE; } - else { - bf->c11 = 1; - s->issueIdle = 1; + + if (!myRank) { + printf("\nTotal nodes: %d, Total routers: %d, Num groups: %d, Routers per group: %d, Virtual radix: %d\n", + p->num_cn * p->num_router_leaf * p->num_groups, p->total_routers, p->num_groups, p->num_routers, p->radix); } - if (s->in_send_loop == 0) { - bf->c5 = 1; - msg->num_cll++; - ts = codes_local_latency(lp); - terminal_plus_message *m; - tw_event *e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_PLUS, (void **) &m, NULL); - m->type = T_SEND; - m->magic = terminal_magic_num; - s->in_send_loop = 1; - tw_event_send(e); + rc = configuration_get_value_double(&config, "PARAMS", "cn_delay", anno, &p->cn_delay); + if (rc) { + p->cn_delay = bytes_to_ns(p->chunk_size, p->cn_bandwidth); + if(!myRank) + fprintf(stderr, "cn_delay not specified, using default calculation: %.2f\n", p->cn_delay); } - total_event_size = - model_net_get_msg_sz(DRAGONFLY_PLUS) + msg->remote_event_size_bytes + msg->local_event_size_bytes; - mn_stats *stat; - stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); - stat->send_count++; - stat->send_bytes += msg->packet_size; - stat->send_time += (1 / p->cn_bandwidth) * msg->packet_size; - if (stat->max_event_size < total_event_size) - stat->max_event_size = total_event_size; + rc = configuration_get_value_double(&config, "PARAMS", "local_delay", anno, &p->local_delay); + if (rc) { + p->local_delay = bytes_to_ns(p->chunk_size, p->local_bandwidth); + if(!myRank) + fprintf(stderr, "local_delay not specified, using default calculation: %.2f\n", p->local_delay); + } + rc = configuration_get_value_double(&config, "PARAMS", "global_delay", anno, &p->global_delay); + if (rc) { + p->global_delay = bytes_to_ns(p->chunk_size, p->global_bandwidth); + if(!myRank) + fprintf(stderr, "global_delay not specified, using default calculation: %.2f\n", p->global_delay); + } - return; -} -static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_plus_message * msg, tw_lp * lp) -{ - int num_qos_levels = s->params->num_qos_levels; + //CREDIT DELAY CONFIGURATION LOGIC ------------ + rc = configuration_get_value_int(&config, "PARAMS", "credit_size", anno, &p->credit_size); + if (rc) { + p->credit_size = 8; + if(!myRank) + fprintf(stderr, "credit_size not specified, using default: %d\n", p->credit_size); + } - if(msg->qos_reset1) - s->qos_status[0] = Q_ACTIVE; - if(msg->qos_reset2) - s->qos_status[1] = Q_ACTIVE; - - if(msg->last_saved_qos) - s->last_qos_lvl = msg->last_saved_qos; + double general_credit_delay; + int credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "credit_delay", anno, &general_credit_delay); + int local_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "local_credit_delay", anno, &p->local_credit_delay); + int global_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "global_credit_delay", anno, &p->global_credit_delay); + int cn_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "cn_credit_delay", anno, &p->cn_credit_delay); - if(bf->c1) { - s->in_send_loop = 1; - if(bf->c3) - s->last_buf_full = msg->saved_busy_time; - return; - } - - for(int i = 0; i < msg->num_cll; i++) - { - codes_local_latency_reverse(lp); + int auto_credit_delay_flag; + rc = configuration_get_value_int(&config, "PARAMS", "auto_credit_delay", anno, &auto_credit_delay_flag); + if (rc) { + auto_credit_delay_flag = 0; } - - for(int i = 0; i < msg->num_rngs; i++) - { - tw_rand_reverse_unif(lp->rng); + else { + if(!myRank && auto_credit_delay_flag) + fprintf(stderr, "auto_credit_delay flag enabled. All credit delays will be calculated based on their respective bandwidths\n"); } - int vcg = msg->saved_vc; - s->terminal_available_time = msg->saved_available_time; - - s->terminal_length[vcg] += s->params->chunk_size; - /*TODO: MM change this to the vcg */ - s->vc_occupancy[vcg] -= s->params->chunk_size; - terminal_plus_message_list* cur_entry = (terminal_plus_message_list *)rc_stack_pop(s->st); + //If the user specifies a general "credit_delay" AND any of the more specific credit delays, throw an error to make sure they correct their configuration + if (!credit_delay_unset && !(local_credit_delay_unset || global_credit_delay_unset || cn_credit_delay_unset)) + tw_error(TW_LOC, "\nCannot set both a general credit delay and specific (local/global/cn) credit delays. Check configuration file."); - int data_size = s->params->chunk_size; - if(cur_entry->msg.packet_size < s->params->chunk_size) - data_size = cur_entry->msg.packet_size % s->params->chunk_size; - - s->qos_data[vcg] -= data_size; - - prepend_to_terminal_plus_message_list(s->terminal_msgs, - s->terminal_msgs_tail, vcg, cur_entry); - if(bf->c4) { - s->in_send_loop = 1; - } - if (bf->c5) { - s->issueIdle = 1; - if (bf->c6) { - s->busy_time = msg->saved_total_time; - s->last_buf_full = msg->saved_busy_time; - s->busy_time_sample = msg->saved_sample_time; - s->ross_sample.busy_time_sample = msg->saved_sample_time; - s->busy_time_ross_sample = msg->saved_busy_time_ross; + //If the user specifies ANY credit delays general or otherwise AND has the auto credit delay flag enabled, throw an error to make sure they correct the conflicting configuration + if ((!credit_delay_unset || !local_credit_delay_unset || !global_credit_delay_unset || !cn_credit_delay_unset) && auto_credit_delay_flag) + tw_error(TW_LOC, "\nCannot set both a credit delay (general or specific) and also enable auto credit delay calculation. Check Configuration file."); + + //If the user doesn't specify either general or specific credit delays - calculate credit delay based on local bandwidth. + //This is old legacy behavior that is left in to make sure that the credit delay configurations of old aren't semantically different + //Other possible way to program this would be to make each credit delay be set based on their respective bandwidths but this semantically + //changes the behavior of old configuration files. (although it would be more accurate) + if (credit_delay_unset && local_credit_delay_unset && global_credit_delay_unset && cn_credit_delay_unset && !auto_credit_delay_flag) { + p->local_credit_delay = bytes_to_ns(p->credit_size, p->local_bandwidth); + p->global_credit_delay = p->local_credit_delay; + p->cn_credit_delay = p->local_credit_delay; + if(!myRank) + fprintf(stderr, "no credit_delay specified - all credit delays set to %.2f\n",p->local_credit_delay); + } + //If the user doesn't specify a general credit delay but leaves any of the specific credit delay values unset, then we need to set those (the above conditional handles if none of them had been set) + else if (credit_delay_unset) { + if (local_credit_delay_unset) { + p->local_credit_delay = bytes_to_ns(p->credit_size, p->local_bandwidth); + if(!myRank && !auto_credit_delay_flag) //if the auto credit delay flag is true then we've already printed what we're going to do + fprintf(stderr, "local_credit_delay not specified, using calculation based on local bandwidth: %.2f\n", p->local_credit_delay); + } + if (global_credit_delay_unset) { + p->global_credit_delay = bytes_to_ns(p->credit_size, p->global_bandwidth); + if(!myRank && !auto_credit_delay_flag) //if the auto credit delay flag is true then we've already printed what we're going to do + fprintf(stderr, "global_credit_delay not specified, using calculation based on global bandwidth: %.2f\n", p->global_credit_delay); + } + if (cn_credit_delay_unset) { + p->cn_credit_delay = bytes_to_ns(p->credit_size, p->cn_bandwidth); + if(!myRank && !auto_credit_delay_flag) //if the auto credit delay flag is true then we've already printed what we're going to do + fprintf(stderr, "cn_credit_delay not specified, using calculation based on cn bandwidth: %.2f\n", p->cn_credit_delay); } } - return; -} -/* sends the packet from the current dragonfly compute node to the attached router */ -static void packet_send(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) -{ - tw_stime ts; - tw_event *e; - terminal_plus_message *m; - tw_lpid router_id; - - int vcg = 0; - int num_qos_levels = s->params->num_qos_levels; - - msg->last_saved_qos = -1; - msg->qos_reset1 = -1; - msg->qos_reset2 = -1; - msg->num_rngs = 0; - msg->num_cll = 0; - - if(num_qos_levels > 1) - vcg = get_next_vcg(s, bf, msg, lp); - - /* For a terminal to router connection, there would be as many VCGs as number - * of VCs*/ + //If the user specifies a general credit delay (but didn't specify any specific credit delays) then we set all specific credit delays to the general + else if (!credit_delay_unset) { + p->local_credit_delay = general_credit_delay; + p->global_credit_delay = general_credit_delay; + p->cn_credit_delay = general_credit_delay; + + if(!myRank) + fprintf(stderr, "general credit_delay specified - all credit delays set to %.2f\n",general_credit_delay); + } + //END CREDIT DELAY CONFIGURATION LOGIC ---------------- - if(vcg == -1) { - bf->c1 = 1; - s->in_send_loop = 0; - if(!s->last_buf_full) { - bf->c3 = 1; - msg->saved_busy_time = s->last_buf_full; - s->last_buf_full = tw_now(lp); - } - return; + if (PRINT_CONFIG && !myRank) { + dragonfly_plus_print_params(p, stderr); } - + stored_params = p; +} - int data_size = s->params->chunk_size; - msg->saved_vc = vcg; - terminal_plus_message_list* cur_entry = s->terminal_msgs[vcg]; - - uint64_t num_chunks = cur_entry->msg.packet_size / s->params->chunk_size; - if (cur_entry->msg.packet_size < s->params->chunk_size) - num_chunks++; +void dragonfly_plus_configure() +{ + anno_map = codes_mapping_get_lp_anno_map(LP_CONFIG_NM_TERM); + assert(anno_map); + num_params = anno_map->num_annos + (anno_map->has_unanno_lp > 0); + all_params = (dragonfly_plus_param *) calloc(num_params, sizeof(*all_params)); - tw_stime delay = s->params->cn_delay; - if ((cur_entry->msg.packet_size < s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) { - data_size = cur_entry->msg.packet_size % s->params->chunk_size; - delay = bytes_to_ns(cur_entry->msg.packet_size % s->params->chunk_size, s->params->cn_bandwidth); + for (int i = 0; i < anno_map->num_annos; i++) { + const char *anno = anno_map->annotations[i].ptr; + dragonfly_read_config(anno, &all_params[i]); + } + if (anno_map->has_unanno_lp > 0) { + dragonfly_read_config(NULL, &all_params[anno_map->num_annos]); } +#ifdef ENABLE_CORTEX +// model_net_topology = dragonfly_plus_cortex_topology; +#endif +} - s->qos_data[vcg] += data_size; - msg->saved_available_time = s->terminal_available_time; - msg->num_rngs++; - ts = g_tw_lookahead + delay + tw_rand_unif(lp->rng); - s->terminal_available_time = maxd(s->terminal_available_time, tw_now(lp)); - s->terminal_available_time += ts; +/* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */ +void dragonfly_plus_report_stats() +{ + long long avg_hops, total_finished_packets, total_finished_chunks; + long long total_finished_msgs, final_msg_sz; + tw_stime avg_time, max_time; + int total_minimal_packets, total_nonmin_packets; + long total_gen, total_fin; + long total_local_packets_sr, total_local_packets_sg, total_remote_packets; - ts = s->terminal_available_time - tw_now(lp); - // TODO: be annotation-aware - codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, &mapping_type_id, NULL, - &mapping_rep_id, &mapping_offset); - codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 1, s->router_id / num_routers_per_mgrp, - s->router_id % num_routers_per_mgrp, &router_id); - // printf("\n Local router id %d global router id %d ", s->router_id, router_id); - // we are sending an event to the router, so no method_event here - void *remote_event; - e = model_net_method_event_new(router_id, ts, lp, DRAGONFLY_PLUS_ROUTER, (void **) &m, &remote_event); - memcpy(m, &cur_entry->msg, sizeof(terminal_plus_message)); - if (m->remote_event_size_bytes) { - memcpy(remote_event, cur_entry->event_data, m->remote_event_size_bytes); + MPI_Reduce(&total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&dragonfly_total_time, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&dragonfly_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES); + + MPI_Reduce(&packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &num_local_packets_sr, &total_local_packets_sr, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &num_local_packets_sg, &total_local_packets_sg, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce( &num_remote_packets, &total_remote_packets, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES); + if(isRoutingAdaptive(routing) || SHOW_ADAPTIVE_STATS) { + MPI_Reduce(&minimal_count, &total_minimal_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); + MPI_Reduce(&nonmin_count, &total_nonmin_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES); } - m->type = R_ARRIVE; - m->src_terminal_id = lp->gid; - m->vc_index = vcg; - m->last_hop = TERMINAL; - m->magic = router_magic_num; - m->path_type = -1; - m->local_event_size_bytes = 0; - m->intm_rtr_id = -1; - m->intm_group_id = -1; - m->dfp_upward_channel_flag = 0; - tw_event_send(e); + /* print statistics */ + if (!g_tw_mynode) { + if (PRINT_CONFIG) + dragonfly_plus_print_params(stored_params, NULL); - if (cur_entry->msg.chunk_id == num_chunks - 1 && (cur_entry->msg.local_event_size_bytes > 0)) { - msg->num_cll++; - tw_stime local_ts = codes_local_latency(lp); - tw_event *e_new = tw_event_new(cur_entry->msg.sender_lp, local_ts, lp); - void *m_new = tw_event_data(e_new); - void *local_event = (char *) cur_entry->event_data + cur_entry->msg.remote_event_size_bytes; - memcpy(m_new, local_event, cur_entry->msg.local_event_size_bytes); - tw_event_send(e_new); + printf( + "Average number of router hops traversed: %f; average chunk latency: %lf us; maximum chunk latency: %lf us; " + "avg message size: %lf bytes; finished messages: %lld; finished chunks: %lld \n", + (float) avg_hops / total_finished_chunks, avg_time / (total_finished_chunks * 1000), + max_time / 1000, (float) final_msg_sz / total_finished_msgs, total_finished_msgs, + total_finished_chunks); + if(isRoutingAdaptive(routing) || SHOW_ADAPTIVE_STATS) { + printf("\nADAPTIVE ROUTING STATS: %d chunks routed minimally %d chunks routed non-minimally - completed packets: %lld \n", + total_minimal_packets, total_nonmin_packets, total_finished_chunks); + } + printf("\nTotal packets generated: %ld; finished: %ld; Locally routed: same router: %ld, different-router: %ld; Remote (inter-group): %ld \n", total_gen, total_fin, total_local_packets_sr, total_local_packets_sg, total_remote_packets); } - - // s->packet_counter++; - s->vc_occupancy[vcg] += s->params->chunk_size; - cur_entry = return_head(s->terminal_msgs, s->terminal_msgs_tail, vcg); - rc_stack_push(lp, cur_entry, delete_terminal_plus_message_list, s->st); - s->terminal_length[vcg] -= s->params->chunk_size; + return; +} + +int get_vcg_from_category(terminal_plus_message * msg) +{ + if(strcmp(msg->category, "high") == 0) + return Q_HIGH; + else if(strcmp(msg->category, "medium") == 0) + return Q_MEDIUM; + else + tw_error(TW_LOC, "\n priority needs to be specified with qos_levels > 1 %s", msg->category); +} + +static int get_term_bandwidth_consumption(terminal_state * s, int qos_lvl) +{ + assert(qos_lvl >= Q_HIGH && qos_lvl <= Q_LOW); + + /* conversion into bytes/sec from GiB/sec */ + double max_bw = s->params->cn_bandwidth * 1024.0 * 1024.0 * 1024.0; + /* conversion into bytes per one nanosecs */ + double max_bw_per_ns = max_bw / (1000.0 * 1000.0 * 1000.0); + /* derive maximum bytes that can be transferred during the window */ + double max_bytes_per_win = max_bw_per_ns * bw_reset_window; + int percent_bw = (((double)s->qos_data[qos_lvl]) / max_bytes_per_win) * 100; +// printf("\n At terminal %lf max bytes %d percent %d ", max_bytes_per_win, s->qos_data[qos_lvl], percent_bw); + return percent_bw; +} + +static int get_rtr_bandwidth_consumption(router_state * s, int qos_lvl, int output_port) +{ + assert(qos_lvl >= Q_HIGH && qos_lvl <= Q_LOW); + assert(output_port < s->params->intra_grp_radix + s->params->num_global_connections + s->params->num_cn); + + int bandwidth = s->params->cn_bandwidth; + if(output_port < s->params->intra_grp_radix) + bandwidth = s->params->local_bandwidth; + else if(output_port < s->params->intra_grp_radix + s->params->num_global_connections) + bandwidth = s->params->global_bandwidth; + + /* conversion into bytes/sec from GiB/sec */ + double max_bw = bandwidth * 1024.0 * 1024.0 * 1024.0; + /* conversion into bytes per one nanosecs */ + double max_bw_per_ns = max_bw / (1000.0 * 1000.0 * 1000.0); + /* derive maximum bytes that can be transferred during the window */ + double max_bytes_per_win = max_bw_per_ns * bw_reset_window; + + int percent_bw = (((double)s->qos_data[output_port][qos_lvl]) / max_bytes_per_win) * 100; +// printf("\n percent bw consumed by qos_lvl %d is %d bytes transferred %d max_bw %lf ", qos_lvl, percent_bw, s->qos_data[output_port][qos_lvl], max_bw_per_ns); + return percent_bw; +} + +void issue_bw_monitor_event_rc(terminal_state * s, tw_bf * bf, terminal_plus_message * msg, tw_lp * lp) +{ + for(int i = 0 ; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); - int next_vcg = 0; + int num_qos_levels = s->params->num_qos_levels; + + if(msg->rc_is_qos_set == 1) + { + for(int i = 0; i < num_qos_levels; i++) + { + s->qos_data[i] = msg->rc_qos_data[i]; + s->qos_status[i] = msg->rc_qos_status[i]; + } + + free(msg->rc_qos_data); + free(msg->rc_qos_status); + msg->rc_is_qos_set = 0; + } + +} +/* resets the bandwidth numbers recorded so far */ +void issue_bw_monitor_event(terminal_state * s, tw_bf * bf, terminal_plus_message * msg, tw_lp * lp) +{ + + msg->num_cll = 0; + msg->num_rngs = 0; + int num_qos_levels = s->params->num_qos_levels; + + //RC data storage start. + //Allocate memory here for these pointers that are stored in the events. FREE THESE IN RC OR IN COMMIT_F + msg->rc_qos_data = (unsigned long long *) calloc(num_qos_levels, sizeof(unsigned long long)); + msg->rc_qos_status = (int *) calloc(num_qos_levels, sizeof(int)); + + //store qos data and status into the arrays. Pointers to the arrays are stored in events. + for(int i = 0; i < num_qos_levels; i++) + { + msg->rc_qos_data[i] = s->qos_data[i]; + msg->rc_qos_status[i] = s->qos_status[i]; + } + msg->rc_is_qos_set = 1; + //RC data storage end. + + /* Reset the qos status and bandwidth consumption. */ + for(int i = 0; i < num_qos_levels; i++) + { + s->qos_status[i] = Q_ACTIVE; + s->qos_data[i] = 0; + } + + if(tw_now(lp) > max_qos_monitor) + return; + + msg->num_cll++; + terminal_plus_message * m; + tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); + tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, DRAGONFLY_PLUS, + (void**)&m, NULL); + m->type = T_BANDWIDTH; + m->magic = terminal_magic_num; + tw_event_send(e); +} + +void issue_rtr_bw_monitor_event_rc(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +{ + int radix = s->params->radix; + int num_qos_levels = s->params->num_qos_levels; + + for(int i = 0 ; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + if(msg->rc_is_qos_set == 1) + { + for(int i = 0; i < radix; i++) + { + for(int j = 0; j < num_qos_levels; j++) + { + s->qos_data[i][j] = *(indexer2d(msg->rc_qos_data, i, j, radix, num_qos_levels)); + s->qos_status[i][j] = *(indexer2d(msg->rc_qos_status, i, j, radix, num_qos_levels)); + } + } + + free(msg->rc_qos_data); + free(msg->rc_qos_status); + msg->rc_is_qos_set = 0; + } +} +void issue_rtr_bw_monitor_event(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +{ + msg->num_cll = 0; + msg->num_rngs = 0; + + int radix = s->params->radix; + int num_qos_levels = s->params->num_qos_levels; + + + //RC data storage start. + //Allocate memory here for these pointers that are stored in the events. FREE THESE IN RC OR IN COMMIT_F + msg->rc_qos_data = (unsigned long long *) calloc(radix * num_qos_levels, sizeof(unsigned long long)); + msg->rc_qos_status = (int *) calloc(radix * num_qos_levels, sizeof(int)); + + //store qos data and status into the arrays. Pointers to the arrays are stored in events. + for(int i = 0; i < radix; i++) + { + for(int j = 0; j < num_qos_levels; j++) + { + *(indexer2d(msg->rc_qos_data, i, j, radix, num_qos_levels)) = s->qos_data[i][j]; + *(indexer2d(msg->rc_qos_status, i, j, radix, num_qos_levels)) = s->qos_status[i][j]; + } + } + msg->rc_is_qos_set = 1; + //RC data storage end. + + + for(int i = 0; i < radix; i++) + { + for(int j = 0; j < num_qos_levels; j++) + { + int bw_consumed = get_rtr_bandwidth_consumption(s, j, i); + + #if DEBUG_QOS == 1 + if(dragonfly_rtr_bw_log != NULL) + { + if(s->qos_data[j][k] > 0) + { + fprintf(dragonfly_rtr_bw_log, "\n %d %f %d %d %d %d %d %f", s->router_id, tw_now(lp), i, j, bw_consumed, s->qos_status[i][j], s->qos_data[i][j], s->busy_time_sample[i]); + } + } + #endif + } + } + + /* Reset the qos status and bandwidth consumption. */ + for(int i = 0; i < s->params->radix; i++) + { + for(int j = 0; j < num_qos_levels; j++) + { + s->qos_status[i][j] = Q_ACTIVE; + s->qos_data[i][j] = 0; + } + s->busy_time_sample[i] = 0; + s->ross_rsample.busy_time[i] = 0; + } + + if(tw_now(lp) > max_qos_monitor) + return; + + msg->num_cll++; + tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); + terminal_plus_message *m; + tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, + DRAGONFLY_PLUS_ROUTER, (void**)&m, NULL); + m->type = R_BANDWIDTH; + m->magic = router_magic_num; + tw_event_send(e); +} + +static int get_next_vcg(terminal_state * s, tw_bf * bf, terminal_plus_message * msg, tw_lp * lp) +{ + int num_qos_levels = s->params->num_qos_levels; - if(num_qos_levels > 1) - next_vcg = get_next_vcg(s, bf, msg, lp); + if(num_qos_levels == 1) + { + if(s->terminal_msgs[0] == NULL || ((s->vc_occupancy[0] + s->params->chunk_size) > s->params->cn_vc_size)) + return -1; + else + return 0; + } - cur_entry = NULL; + int bw_consumption[num_qos_levels]; + + /* First make sure the bandwidth consumptions are up to date. */ + for(int k = 0; k < num_qos_levels; k++) + { + if(s->qos_status[k] != Q_OVERBW) + { + bw_consumption[k] = get_term_bandwidth_consumption(s, k); + if(bw_consumption[k] > s->params->qos_bandwidths[k]) + { + if(k == 0) + msg->qos_reset1 = 1; + else if(k == 1) + msg->qos_reset2 = 1; + + s->qos_status[k] = Q_OVERBW; + } + } + } + if(BW_MONITOR == 1) + { + for(int i = 0; i < num_qos_levels; i++) + { + if(s->qos_status[i] == Q_ACTIVE) + { + if(s->terminal_msgs[i] != NULL && ((s->vc_occupancy[i] + s->params->chunk_size) <= s->params->cn_vc_size)) + return i; + } + } + } + + int next_rr_vcg = (s->last_qos_lvl + 1) % num_qos_levels; + /* All vcgs are exceeding their bandwidth limits*/ + for(int i = 0; i < num_qos_levels; i++) + { + if(s->terminal_msgs[i] != NULL && ((s->vc_occupancy[i] + s->params->chunk_size) <= s->params->cn_vc_size)) + { + bf->c2 = 1; + + if(msg->last_saved_qos < 0) + msg->last_saved_qos = s->last_qos_lvl; + + s->last_qos_lvl = next_rr_vcg; + return i; + } + next_rr_vcg = (next_rr_vcg + 1) % num_qos_levels; + } + return -1; +} + +static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_plus_message * msg, tw_lp * lp) +{ + int num_qos_levels = s->params->num_qos_levels; + + int vcs_per_qos = s->params->num_vcs / num_qos_levels; + int output_port = msg->vc_index; + int vcg = 0; + int base_limit = 0; - if(next_vcg >= 0) - cur_entry = s->terminal_msgs[next_vcg]; + int chunk_size = s->params->chunk_size; + int bw_consumption[num_qos_levels]; + /* First make sure the bandwidth consumptions are up to date. */ + if(BW_MONITOR == 1) + { + for(int k = 0; k < num_qos_levels; k++) + { + if(s->qos_status[output_port][k] != Q_OVERBW) + { + bw_consumption[k] = get_rtr_bandwidth_consumption(s, k, output_port); + if(bw_consumption[k] > s->params->qos_bandwidths[k]) + { +// printf("\n Router %d QoS %d exceeded allowed bandwidth %d ", s->router_id, k, bw_consumption[k]); + if(k == 0) + msg->qos_reset1 = 1; + else if(k == 1) + msg->qos_reset2 = 1; + + s->qos_status[output_port][k] = Q_OVERBW; + } + } + } + int vc_size = s->params->global_vc_size; + if(output_port < s->params->intra_grp_radix) + vc_size = s->params->local_vc_size; + + /* TODO: If none of the vcg is exceeding bandwidth limit then select high + * priority traffic first. */ + for(int i = 0; i < num_qos_levels; i++) + { + if(s->qos_status[output_port][i] == Q_ACTIVE) + { + int base_limit = i * vcs_per_qos; + for(int k = base_limit; k < base_limit + vcs_per_qos; k ++) + { + if(s->pending_msgs[output_port][k] != NULL) + return k; + } + } + } + } + + /* All vcgs are exceeding their bandwidth limits*/ + msg->last_saved_qos = s->last_qos_lvl[output_port]; + int next_rr_vcg = (s->last_qos_lvl[output_port] + 1) % num_qos_levels; - /* if there is another packet inline then schedule another send event */ - if (cur_entry != NULL && s->vc_occupancy[next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) { - terminal_plus_message *m_new; - msg->num_rngs++; - ts += tw_rand_unif(lp->rng); - e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_PLUS, (void **) &m_new, NULL); - m_new->type = T_SEND; - m_new->magic = terminal_magic_num; - tw_event_send(e); - } - else { - /* If not then the LP will wait for another credit or packet generation */ - bf->c4 = 1; - s->in_send_loop = 0; - } - if (s->issueIdle) { - bf->c5 = 1; - s->issueIdle = 0; - msg->num_rngs++; - ts += tw_rand_unif(lp->rng); - model_net_method_idle_event(ts, 0, lp); + for(int i = 0; i < num_qos_levels; i++) + { + base_limit = next_rr_vcg * vcs_per_qos; + for(int k = base_limit; k < base_limit + vcs_per_qos; k++) + { + if(s->pending_msgs[output_port][k] != NULL) + { + if(msg->last_saved_qos < 0) + msg->last_saved_qos = s->last_qos_lvl[output_port]; - if (s->last_buf_full > 0.0) { - bf->c6 = 1; - msg->saved_total_time = s->busy_time; - msg->saved_busy_time = s->last_buf_full; - msg->saved_sample_time = s->busy_time_sample; + s->last_qos_lvl[output_port] = next_rr_vcg; + return k; + } + } + next_rr_vcg = (next_rr_vcg + 1) % num_qos_levels; + assert(next_rr_vcg < 2); + } + return -1; +} - s->busy_time += (tw_now(lp) - s->last_buf_full); - s->busy_time_sample += (tw_now(lp) - s->last_buf_full); - s->ross_sample.busy_time_sample += (tw_now(lp) - s->last_buf_full); - s->last_buf_full = 0.0; +void terminal_plus_commit(terminal_state * s, + tw_bf * bf, + terminal_plus_message * msg, + tw_lp * lp) +{ + if(msg->type == T_BANDWIDTH) + { + if(msg->rc_is_qos_set == 1) { + free(msg->rc_qos_data); + free(msg->rc_qos_status); + msg->rc_is_qos_set = 0; } } - return; } -static void packet_arrive_rc(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +void router_plus_commit(router_state * s, + tw_bf * bf, + terminal_plus_message * msg, + tw_lp * lp) { - for(int i = 0; i < msg->num_rngs; i++) - tw_rand_reverse_unif(lp->rng); - - for(int i = 0; i < msg->num_cll; i++) - codes_local_latency_reverse(lp); - - if (bf->c31) { - s->packet_fin--; - packet_fin--; + if(msg->type == R_BANDWIDTH) + { + if(msg->rc_is_qos_set == 1) { + free(msg->rc_qos_data); + free(msg->rc_qos_status); + msg->rc_is_qos_set = 0; + } } - if (msg->path_type == MINIMAL) - minimal_count--; - else - nonmin_count--; - // if (msg->path_type == NON_MINIMAL) - // nonmin_count--; - - N_finished_chunks--; - s->finished_chunks--; - s->fin_chunks_sample--; - s->ross_sample.fin_chunks_sample--; - s->fin_chunks_ross_sample--; +} - total_hops -= msg->my_N_hop; - s->total_hops -= msg->my_N_hop; - s->fin_hops_sample -= msg->my_N_hop; - s->ross_sample.fin_hops_sample -= msg->my_N_hop; - s->fin_hops_ross_sample -= msg->my_N_hop; - dragonfly_total_time = msg->saved_total_time; - s->fin_chunks_time = msg->saved_sample_time; - s->ross_sample.fin_chunks_time = msg->saved_sample_time; - s->fin_chunks_time_ross_sample = msg->saved_fin_chunks_ross; - s->total_time = msg->saved_avg_time; +/* initialize a dragonfly compute node terminal */ +void terminal_plus_init(terminal_state *s, tw_lp *lp) +{ + // printf("%d: Terminal Init()\n",lp->gid); + s->packet_gen = 0; + s->packet_fin = 0; + s->total_gen_size = 0; + s->is_monitoring_bw = 0; - struct qhash_head *hash_link = NULL; - struct dfly_qhash_entry *tmp = NULL; + int i; + char anno[MAX_NAME_LENGTH]; - struct dfly_hash_key key; - key.message_id = msg->message_id; - key.sender_id = msg->sender_lp; + // Assign the global router ID + // TODO: be annotation-aware + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, &mapping_type_id, anno, + &mapping_rep_id, &mapping_offset); + if (anno[0] == '\0') { + s->anno = NULL; + s->params = &all_params[num_params - 1]; + } + else { + s->anno = strdup(anno); + int id = configuration_get_annotation_index(anno, anno_map); + s->params = &all_params[id]; + } - hash_link = qhash_search(s->rank_tbl, &key); - tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); + int num_qos_levels = s->params->num_qos_levels; + int num_lps = codes_mapping_get_lp_count(lp_group_name, 1, LP_CONFIG_NM_TERM, s->anno, 0); + - mn_stats *stat; - stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); - stat->recv_time = msg->saved_rcv_time; + s->terminal_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + s->router_id = dragonfly_plus_get_assigned_router_id(s->terminal_id, s->params); + // s->router_id=(int)s->terminal_id / (s->params->num_cn); //TODO I think this is where the router that + // the terminal is connected to is specified - if (bf->c1) { - stat->recv_count--; - stat->recv_bytes -= msg->packet_size; - N_finished_packets--; - s->finished_packets--; - } - if (bf->c3) { - dragonfly_max_latency = msg->saved_available_time; - } + // printf("%d gid is TERMINAL %d with assigned router %d\n",lp->gid,s->terminal_id,s->router_id); + s->terminal_available_time = 0.0; + s->packet_counter = 0; + s->min_latency = INT_MAX; + s->max_latency = 0; - if (bf->c22) { - s->max_latency = msg->saved_available_time; - } - if (bf->c7) { - // assert(!hash_link); - N_finished_msgs--; - s->finished_msgs--; - total_msg_sz -= msg->total_size; - s->total_msg_size -= msg->total_size; - s->data_size_sample -= msg->total_size; - s->ross_sample.data_size_sample -= msg->total_size; - s->data_size_ross_sample -= msg->total_size; + s->finished_msgs = 0; + s->finished_chunks = 0; + s->finished_packets = 0; + s->total_time = 0.0; + s->total_msg_size = 0; - struct dfly_qhash_entry *d_entry_pop = (dfly_qhash_entry *) rc_stack_pop(s->st); - qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link)); - s->rank_tbl_pop++; + s->stalled_chunks = 0; + s->busy_time = 0.0; - if (s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) - tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); + s->fwd_events = 0; + s->rev_events = 0; - hash_link = &(d_entry_pop->hash_link); - tmp = d_entry_pop; + rc_stack_create(&s->st); + + s->num_vcs = 1; + if(num_qos_levels > 1) + s->num_vcs *= num_qos_levels; + + /* Whether the virtual channel group is active or over-bw*/ + s->qos_status = (int*)calloc(num_qos_levels, sizeof(int)); + + /* How much data has been transmitted on the virtual channel group within + * the window */ + s->qos_data = (unsigned long long*)calloc(num_qos_levels, sizeof(unsigned long long)); + s->vc_occupancy = (int*)calloc(s->num_vcs, sizeof(int)); - if (bf->c4) - model_net_event_rc2(lp, &msg->event_rc); + + for(i = 0; i < num_qos_levels; i++) + { + s->qos_data[i] = 0; + s->qos_status[i] = Q_ACTIVE; } - assert(tmp); - tmp->num_chunks--; - - if (bf->c5) { - qhash_del(hash_link); - free_tmp(tmp); - s->rank_tbl_pop--; + for(i = 0; i < s->num_vcs; i++) + { + s->vc_occupancy[i] = 0; } - return; -} -static void send_remote_event(terminal_state *s, terminal_plus_message *msg, tw_lp *lp, tw_bf *bf, - char *event_data, int remote_event_size) -{ - void *tmp_ptr = model_net_method_get_edata(DRAGONFLY_PLUS, msg); - // tw_stime ts = g_tw_lookahead + bytes_to_ns(msg->remote_event_size_bytes, (1/s->params->cn_bandwidth)); - msg->num_rngs++; - tw_stime ts = g_tw_lookahead + mpi_soft_overhead + tw_rand_unif(lp->rng); - if (msg->is_pull) { - bf->c4 = 1; - struct codes_mctx mc_dst = codes_mctx_set_global_direct(msg->sender_mn_lp); - struct codes_mctx mc_src = codes_mctx_set_global_direct(lp->gid); - int net_id = model_net_get_id(LP_METHOD_NM_TERM); - model_net_set_msg_param(MN_MSG_PARAM_START_TIME, MN_MSG_PARAM_START_TIME_VAL, &(msg->msg_start_time)); + s->last_qos_lvl = 0; + s->last_buf_full = 0; - msg->event_rc = model_net_event_mctx(net_id, &mc_src, &mc_dst, msg->category, msg->sender_lp, - msg->pull_size, ts, remote_event_size, tmp_ptr, 0, NULL, lp); - } - else { - tw_event *e = tw_event_new(msg->final_dest_gid, ts, lp); - void *m_remote = tw_event_data(e); - memcpy(m_remote, event_data, remote_event_size); - tw_event_send(e); - } - return; -} -/* packet arrives at the destination terminal */ -static void packet_arrive(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) -{ - // NIC aggregation - should this be a separate function? - // Trigger an event on receiving server + s->rank_tbl = NULL; + s->terminal_msgs = + (terminal_plus_message_list **) calloc(s->num_vcs, sizeof(terminal_plus_message_list *)); + s->terminal_msgs_tail = + (terminal_plus_message_list **) calloc(s->num_vcs, sizeof(terminal_plus_message_list *)); - if (msg->my_N_hop > s->params->max_hops_notify) + for(int i = 0; i < s->num_vcs; i++) { - printf("Terminal received a packet with %d hops! (Notify on > than %d)\n",msg->my_N_hop, s->params->max_hops_notify); + s->terminal_msgs[i] = NULL; + s->terminal_msgs_tail[i] = NULL; } - - msg->num_rngs = 0; - msg->num_cll = 0; - if (!s->rank_tbl) - s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE); + s->terminal_length = (unsigned long long*)calloc(s->num_vcs, sizeof(unsigned long long)); + s->in_send_loop = 0; + s->issueIdle = 0; - struct dfly_hash_key key; - key.message_id = msg->message_id; - key.sender_id = msg->sender_lp; + return; +} - struct qhash_head *hash_link = NULL; - struct dfly_qhash_entry *tmp = NULL; +/* sets up the router virtual channels, global channels, + * local channels, compute node channels */ +void router_plus_init(router_state *r, tw_lp *lp) +{ + // printf("%d: Router Init()\n",lp->gid); - hash_link = qhash_search(s->rank_tbl, &key); + char anno[MAX_NAME_LENGTH]; + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, &mapping_type_id, anno, + &mapping_rep_id, &mapping_offset); - if (hash_link) - tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); + if (anno[0] == '\0') { + r->anno = NULL; + r->params = &all_params[num_params - 1]; + } + else { + r->anno = strdup(anno); + int id = configuration_get_annotation_index(anno, anno_map); + r->params = &all_params[id]; + } - uint64_t total_chunks = msg->total_size / s->params->chunk_size; + // shorthand + const dragonfly_plus_param *p = r->params; - if (msg->total_size % s->params->chunk_size) - total_chunks++; + num_routers_per_mgrp = + codes_mapping_get_lp_count(lp_group_name, 1, "modelnet_dragonfly_plus_router", NULL, 0); + int num_grp_reps = codes_mapping_get_group_reps(lp_group_name); + if (p->total_routers != num_grp_reps * num_routers_per_mgrp) + tw_error(TW_LOC, + "\n Config error: num_routers specified %d total routers computed in the network %d " + "does not match with repetitions * dragonfly_router %d ", + p->num_routers, p->total_routers, num_grp_reps * num_routers_per_mgrp); - if (!total_chunks) - total_chunks = 1; + r->router_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0); + r->group_id = r->router_id / p->num_routers; - /*if(tmp) - { - if(tmp->num_chunks >= total_chunks || tmp->num_chunks < 0) - { - //tw_output(lp, "\n invalid number of chunks %d for LP %ld ", tmp->num_chunks, lp->gid); - tw_lp_suspend(lp, 0, 0); - return; - } - }*/ - assert(lp->gid == msg->dest_terminal_id); + // printf("\n Local router id %d global id %d ", r->router_id, lp->gid); - if (msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) - printf("\n Packet %llu arrived at lp %llu hops %d ", msg->packet_ID, LLU(lp->gid), msg->my_N_hop); + r->num_rtr_rc_windows = 100; + r->is_monitoring_bw = 0; + r->fwd_events = 0; + r->rev_events = 0; - msg->num_rngs++; - tw_stime ts = g_tw_lookahead + s->params->credit_delay + tw_rand_unif(lp->rng); + // QoS related variables + int num_qos_levels = p->num_qos_levels; - // no method_event here - message going to router - tw_event *buf_e; - terminal_plus_message *buf_msg; - buf_e = - model_net_method_event_new(msg->intm_lp_id, ts, lp, DRAGONFLY_PLUS_ROUTER, (void **) &buf_msg, NULL); - buf_msg->magic = router_magic_num; - buf_msg->vc_index = msg->vc_index; - buf_msg->output_chan = msg->output_chan; - buf_msg->type = R_BUFFER; - tw_event_send(buf_e); + // Determine if router is a spine or a leaf + int intra_group_id = r->router_id % p->num_routers; + if (intra_group_id >= (p->num_routers / 2)) { //TODO this assumes symmetric spine and leafs + r->dfp_router_type = SPINE; + assert(router_type_map[r->router_id] == SPINE); + // printf("%lu: %i is a SPINE\n",lp->gid, r->router_id); + } + else { + r->dfp_router_type = LEAF; + assert(router_type_map[r->router_id] == LEAF); + // printf("%lu: %i is a LEAF\n",lp->gid, r->router_id); + } +#if DEBUG_QOS == 1 + char rtr_bw_log[128]; + sprintf(rtr_bw_log, "router-bw-tracker-%d", g_tw_mynode); + + if(dragonfly_rtr_bw_log == NULL) + { + dragonfly_rtr_bw_log = fopen(rtr_bw_log, "w+"); + + fprintf(dragonfly_rtr_bw_log, "\n router-id time-stamp port-id qos-level bw-consumed qos-status qos-data busy-time"); + } +#endif + r->connMan = &connManagerList[r->router_id]; - bf->c1 = 0; - bf->c3 = 0; - bf->c4 = 0; - bf->c7 = 0; + r->gc_usage = (int *) calloc(p->num_global_connections, sizeof(int)); - /* Total overall finished chunks in simulation */ - N_finished_chunks++; - /* Finished chunks on a LP basis */ - s->finished_chunks++; - /* Finished chunks per sample */ - s->fin_chunks_sample++; - s->ross_sample.fin_chunks_sample++; - s->fin_chunks_ross_sample++; + r->global_channel = (int *) calloc(p->num_global_connections, sizeof(int)); + r->next_output_available_time = (tw_stime *) calloc(p->radix, sizeof(tw_stime)); + r->link_traffic = (int64_t *) calloc(p->radix, sizeof(int64_t)); + r->link_traffic_sample = (int64_t *) calloc(p->radix, sizeof(int64_t)); - /* WE do not allow self messages through dragonfly */ - assert(lp->gid != msg->src_terminal_id); + r->stalled_chunks = (unsigned long*)calloc(p->radix, sizeof(unsigned long)); - // Verify that the router that send the packet to this terminal is the router assigned to this terminal - int dest_router_id = dragonfly_plus_get_assigned_router_id(s->terminal_id, s->params); - int received_from_rel_id = codes_mapping_get_lp_relative_id(msg->intm_lp_id,0,0); - assert(dest_router_id == received_from_rel_id); + r->vc_occupancy = (int **) calloc(p->radix, sizeof(int *)); + r->qos_data = (unsigned long long**)calloc(p->radix, sizeof(unsigned long long*)); + r->last_qos_lvl = (int*)calloc(p->radix, sizeof(int)); + r->qos_status = (int**)calloc(p->radix, sizeof(int*)); + r->in_send_loop = (int *) calloc(p->radix, sizeof(int)); + r->pending_msgs = + (terminal_plus_message_list ***) calloc(p->radix, sizeof(terminal_plus_message_list **)); + r->pending_msgs_tail = + (terminal_plus_message_list ***) calloc(p->radix, sizeof(terminal_plus_message_list **)); + r->queued_msgs = + (terminal_plus_message_list ***) calloc(p->radix, sizeof(terminal_plus_message_list **)); + r->queued_msgs_tail = + (terminal_plus_message_list ***) calloc(p->radix, sizeof(terminal_plus_message_list **)); + r->queued_count = (int *) calloc(p->radix, sizeof(int)); + r->last_buf_full = (tw_stime*) calloc(p->radix, sizeof(tw_stime *)); + r->busy_time = (tw_stime *) calloc(p->radix, sizeof(tw_stime)); + r->busy_time_sample = (tw_stime *) calloc(p->radix, sizeof(tw_stime)); - uint64_t num_chunks = msg->packet_size / s->params->chunk_size; - if (msg->packet_size < s->params->chunk_size) - num_chunks++; + /* set up for ROSS stats sampling */ + r->link_traffic_ross_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); + r->busy_time_ross_sample = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + if (g_st_model_stats) + lp->model_types->mstat_sz = sizeof(tw_lpid) + (sizeof(int64_t) + sizeof(tw_stime)) * p->radix; + if (g_st_use_analysis_lps && g_st_model_stats) + lp->model_types->sample_struct_sz = sizeof(struct dfly_router_sample) + (sizeof(tw_stime) + sizeof(int64_t)) * p->radix; + r->ross_rsample.busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime)); + r->ross_rsample.link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t)); - if (msg->path_type == MINIMAL) - minimal_count++; - else - nonmin_count++; + rc_stack_create(&r->st); - if (msg->chunk_id == num_chunks - 1) { - bf->c31 = 1; - s->packet_fin++; - packet_fin++; + for (int i = 0; i < p->radix; i++) { + // Set credit & router occupancy + r->last_buf_full[i] = 0.0; + r->busy_time[i] = 0.0; + r->busy_time_sample[i] = 0.0; + r->next_output_available_time[i] = 0; + r->last_qos_lvl[i] = 0; + r->link_traffic[i] = 0; + r->link_traffic_sample[i] = 0; + r->queued_count[i] = 0; + r->in_send_loop[i] = 0; + r->vc_occupancy[i] = (int *) calloc(p->num_vcs, sizeof(int)); + r->pending_msgs[i] = + (terminal_plus_message_list **) calloc(p->num_vcs, sizeof(terminal_plus_message_list *)); + r->pending_msgs_tail[i] = + (terminal_plus_message_list **) calloc(p->num_vcs, sizeof(terminal_plus_message_list *)); + r->queued_msgs[i] = + (terminal_plus_message_list **) calloc(p->num_vcs, sizeof(terminal_plus_message_list *)); + r->queued_msgs_tail[i] = + (terminal_plus_message_list **) calloc(p->num_vcs, sizeof(terminal_plus_message_list *)); + + r->qos_status[i] = (int*)calloc(num_qos_levels, sizeof(int)); + r->qos_data[i] = (unsigned long long*)calloc(num_qos_levels, sizeof(unsigned long long)); + + for(int j = 0; j < num_qos_levels; j++) + { + r->qos_status[i][j] = Q_ACTIVE; + r->qos_data[i][j] = 0; + } + for (int j = 0; j < p->num_vcs; j++) { + r->vc_occupancy[i][j] = 0; + r->pending_msgs[i][j] = NULL; + r->pending_msgs_tail[i][j] = NULL; + r->queued_msgs[i][j] = NULL; + r->queued_msgs_tail[i][j] = NULL; + } } - // if (msg->path_type != MINIMAL) - // printf("\n Wrong message path type %d ", msg->path_type); - - /* save the sample time */ - msg->saved_sample_time = s->fin_chunks_time; - s->fin_chunks_time += (tw_now(lp) - msg->travel_start_time); - s->ross_sample.fin_chunks_time += (tw_now(lp) - msg->travel_start_time); - msg->saved_fin_chunks_ross = s->fin_chunks_time_ross_sample; - s->fin_chunks_time_ross_sample += (tw_now(lp) - msg->travel_start_time); - /* save the total time per LP */ - msg->saved_avg_time = s->total_time; - s->total_time += (tw_now(lp) - msg->travel_start_time); + r->connMan->solidify_connections(); - msg->saved_total_time = dragonfly_total_time; - dragonfly_total_time += tw_now(lp) - msg->travel_start_time; - total_hops += msg->my_N_hop; - s->total_hops += msg->my_N_hop; - s->fin_hops_sample += msg->my_N_hop; - s->ross_sample.fin_hops_sample += msg->my_N_hop; - s->fin_hops_ross_sample += msg->my_N_hop; + return; +} - mn_stats *stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); - msg->saved_rcv_time = stat->recv_time; - stat->recv_time += (tw_now(lp) - msg->travel_start_time); +/* dragonfly packet event reverse handler */ +static void dragonfly_plus_packet_event_rc(tw_lp *sender) +{ + codes_local_latency_reverse(sender); + return; +} -#if DEBUG == 1 - if (msg->packet_ID == TRACK && msg->chunk_id == num_chunks - 1 && msg->message_id == TRACK_MSG) { - printf("(%lf) [Terminal %d] packet %lld has arrived \n", tw_now(lp), (int) lp->gid, msg->packet_ID); +/* MM: These packet events (packet_send, packet_receive etc.) will be used as is, basically, the routing + * functions will be changed only. */ +/* dragonfly packet event , generates a dragonfly packet on the compute node */ +static tw_stime dragonfly_plus_packet_event(model_net_request const *req, + uint64_t message_offset, + uint64_t packet_size, + tw_stime offset, + mn_sched_params const *sched_params, + void const *remote_event, + void const *self_event, + tw_lp *sender, + int is_last_pckt) +{ + (void) message_offset; + (void) sched_params; + tw_event *e_new; + tw_stime xfer_to_nic_time; + terminal_plus_message *msg; + char *tmp_ptr; - printf("travel start time is %f\n", msg->travel_start_time); + xfer_to_nic_time = codes_local_latency(sender); + // e_new = tw_event_new(sender->gid, xfer_to_nic_time+offset, sender); + // msg = tw_event_data(e_new); + e_new = model_net_method_event_new(sender->gid, xfer_to_nic_time + offset, sender, DRAGONFLY_PLUS, + (void **) &msg, (void **) &tmp_ptr); + strcpy(msg->category, req->category); + msg->final_dest_gid = req->final_dest_lp; + msg->total_size = req->msg_size; + msg->sender_lp = req->src_lp; + msg->sender_mn_lp = sender->gid; + msg->packet_size = packet_size; + msg->travel_start_time = tw_now(sender); + msg->remote_event_size_bytes = 0; + msg->local_event_size_bytes = 0; + msg->type = T_GENERATE; + msg->dest_terminal_id = req->dest_mn_lp; + msg->dfp_dest_terminal_id = codes_mapping_get_lp_relative_id(msg->dest_terminal_id,0,0); + msg->message_id = req->msg_id; + msg->is_pull = req->is_pull; + msg->pull_size = req->pull_size; + msg->magic = terminal_magic_num; + msg->msg_start_time = req->msg_start_time; - printf("My hop now is %d\n", msg->my_N_hop); + if (is_last_pckt) /* Its the last packet so pass in remote and local event information*/ + { + if (req->remote_event_size > 0) { + msg->remote_event_size_bytes = req->remote_event_size; + memcpy(tmp_ptr, remote_event, req->remote_event_size); + tmp_ptr += req->remote_event_size; + } + if (req->self_event_size > 0) { + msg->local_event_size_bytes = req->self_event_size; + memcpy(tmp_ptr, self_event, req->self_event_size); + tmp_ptr += req->self_event_size; + } } -#endif - - /* Now retreieve the number of chunks completed from the hash and update - * them */ - void *m_data_src = model_net_method_get_edata(DRAGONFLY_PLUS, msg); + // printf("\n dragonfly remote event %d local event %d last packet %d %lf ", msg->remote_event_size_bytes, + // msg->local_event_size_bytes, is_last_pckt, xfer_to_nic_time); + tw_event_send(e_new); + return xfer_to_nic_time; +} - /* If an entry does not exist then create one */ - if (!tmp) { - bf->c5 = 1; - struct dfly_qhash_entry *d_entry = (dfly_qhash_entry *) calloc(1, sizeof(struct dfly_qhash_entry)); - d_entry->num_chunks = 0; - d_entry->key = key; - d_entry->remote_event_data = NULL; - d_entry->remote_event_size = 0; - qhash_add(s->rank_tbl, &key, &(d_entry->hash_link)); - s->rank_tbl_pop++; +static void packet_generate_rc(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +{ + s->total_gen_size -= msg->packet_size; + s->packet_gen--; + packet_gen--; + s->packet_counter--; - if (s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) - tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); + if(bf->c2) + num_local_packets_sr--; + if(bf->c3) + num_local_packets_sg--; + if(bf->c4) + num_remote_packets--; + + for(int i = 0; i < msg->num_rngs; i++) + tw_rand_reverse_unif(lp->rng); - hash_link = &(d_entry->hash_link); - tmp = d_entry; - } + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); - assert(tmp); - tmp->num_chunks++; + int num_qos_levels = s->params->num_qos_levels; + if(bf->c1) + s->is_monitoring_bw = 0; - if (msg->chunk_id == num_chunks - 1) { - bf->c1 = 1; - stat->recv_count++; - stat->recv_bytes += msg->packet_size; + int num_chunks = msg->packet_size / s->params->chunk_size; + if (msg->packet_size < s->params->chunk_size) + num_chunks++; - N_finished_packets++; - s->finished_packets++; - } - /* if its the last chunk of the packet then handle the remote event data */ - if (msg->remote_event_size_bytes > 0 && !tmp->remote_event_data) { - /* Retreive the remote event entry */ - tmp->remote_event_data = (char *) calloc(1, msg->remote_event_size_bytes); - assert(tmp->remote_event_data); - tmp->remote_event_size = msg->remote_event_size_bytes; - memcpy(tmp->remote_event_data, m_data_src, msg->remote_event_size_bytes); - } - if (s->min_latency > tw_now(lp) - msg->travel_start_time) { - s->min_latency = tw_now(lp) - msg->travel_start_time; + + int vcg = 0; + if(num_qos_levels > 1) + { + vcg = get_vcg_from_category(msg); + assert(vcg == Q_HIGH || vcg == Q_MEDIUM); } - if (dragonfly_max_latency < tw_now(lp) - msg->travel_start_time) { - bf->c3 = 1; - msg->saved_available_time = dragonfly_max_latency; - dragonfly_max_latency = tw_now(lp) - msg->travel_start_time; - s->max_latency = tw_now(lp) - msg->travel_start_time; + assert(vcg < num_qos_levels); + + int i; + for (i = 0; i < num_chunks; i++) { + delete_terminal_plus_message_list(return_tail(s->terminal_msgs, s->terminal_msgs_tail, vcg)); + s->terminal_length[vcg] -= s->params->chunk_size; } - if (s->max_latency < tw_now(lp) - msg->travel_start_time) { - bf->c22 = 1; - msg->saved_available_time = s->max_latency; - s->max_latency = tw_now(lp) - msg->travel_start_time; + if (bf->c5) { + s->in_send_loop = 0; } - /* If all chunks of a message have arrived then send a remote event to the - * callee*/ - // assert(tmp->num_chunks <= total_chunks); - - if (tmp->num_chunks >= total_chunks) { - bf->c7 = 1; - - s->data_size_sample += msg->total_size; - s->ross_sample.data_size_sample += msg->total_size; - s->data_size_ross_sample += msg->total_size; - N_finished_msgs++; - total_msg_sz += msg->total_size; - s->total_msg_size += msg->total_size; - s->finished_msgs++; - - // assert(tmp->remote_event_data && tmp->remote_event_size > 0); - if (tmp->remote_event_data && tmp->remote_event_size > 0) { - bf->c8 = 1; - send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size); + if (bf->c11) { + s->issueIdle = 0; + s->stalled_chunks--; + if(bf->c8) { + s->last_buf_full = msg->saved_busy_time; } - /* Remove the hash entry */ - qhash_del(hash_link); - rc_stack_push(lp, tmp, free_tmp, s->st); - s->rank_tbl_pop--; } - return; + struct mn_stats *stat; + stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->send_count--; + stat->send_bytes -= msg->packet_size; + stat->send_time -= (1 / s->params->cn_bandwidth) * msg->packet_size; } -void dragonfly_plus_rsample_init(router_state *s, tw_lp *lp) +/* generates packet at the current dragonfly compute node */ +static void packet_generate(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) { - (void) lp; - int i = 0; - const dragonfly_plus_param *p = s->params; + msg->num_rngs = 0; + msg->num_cll = 0; + + packet_gen++; + s->packet_gen++; + s->total_gen_size += msg->packet_size; + + int num_qos_levels = s->params->num_qos_levels; + int vcg = 0; //by default there's only one VC for terminals, VC0. There can be more based on the number of QoS levels - assert(p->radix); + if (num_qos_levels > 1) { + tw_lpid router_id; + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset); + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 0, s->router_id / num_routers_per_mgrp, s->router_id % num_routers_per_mgrp, &router_id); - s->max_arr_size = MAX_STATS; - s->rsamples = (struct dfly_router_sample *) calloc(MAX_STATS, sizeof(struct dfly_router_sample)); - for (; i < s->max_arr_size; i++) { - s->rsamples[i].busy_time = (tw_stime *) calloc(p->radix, sizeof(tw_stime)); - s->rsamples[i].link_traffic_sample = (int64_t *) calloc(p->radix, sizeof(int64_t)); + if (s->is_monitoring_bw == 0) { + bf->c1 = 1; + /* Issue an event on both terminal and router to monitor bandwidth */ + msg->num_cll++; + tw_stime bw_ts = bw_reset_window + codes_local_latency(lp); + terminal_plus_message * m; + tw_event * e = model_net_method_event_new(lp->gid, bw_ts, lp, DRAGONFLY_PLUS, (void**)&m, NULL); + m->type = T_BANDWIDTH; + m->magic = terminal_magic_num; + s->is_monitoring_bw = 1; + tw_event_send(e); + } + + vcg = get_vcg_from_category(msg); + assert(vcg == Q_HIGH || vcg == Q_MEDIUM); } -} -void dragonfly_plus_rsample_rc_fn(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) -{ - (void) bf; - (void) lp; - (void) msg; + assert(vcg < num_qos_levels); - s->op_arr_size--; - int cur_indx = s->op_arr_size; - struct dfly_router_sample stat = s->rsamples[cur_indx]; + tw_stime ts, nic_ts; + assert(lp->gid != msg->dest_terminal_id); const dragonfly_plus_param *p = s->params; - int i = 0; - for (; i < p->radix; i++) { - s->busy_time_sample[i] = stat.busy_time[i]; - s->link_traffic_sample[i] = stat.link_traffic_sample[i]; - } + int total_event_size; + uint64_t num_chunks = msg->packet_size / p->chunk_size; + double cn_delay = s->params->cn_delay; - for (i = 0; i < p->radix; i++) { - stat.busy_time[i] = 0; - stat.link_traffic_sample[i] = 0; + int dest_router_id = dragonfly_plus_get_assigned_router_id(msg->dfp_dest_terminal_id, s->params); + int dest_grp_id = dest_router_id / s->params->num_routers; + int src_grp_id = s->router_id / s->params->num_routers; + + if(src_grp_id == dest_grp_id) + { + if(dest_router_id == s->router_id) + { + bf->c2 = 1; + num_local_packets_sr++; + } + else + { + bf->c3 = 1; + num_local_packets_sg++; + } + } + else + { + bf->c4 = 1; + num_remote_packets++; } - s->fwd_events = stat.fwd_events; - s->rev_events = stat.rev_events; -} -void dragonfly_plus_rsample_fn(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) -{ - (void) bf; - (void) lp; - (void) msg; + if (msg->packet_size < s->params->chunk_size) + num_chunks++; + + if (msg->packet_size < s->params->chunk_size) + cn_delay = bytes_to_ns(msg->packet_size % s->params->chunk_size, s->params->cn_bandwidth); + + msg->num_rngs++; + nic_ts = g_tw_lookahead + (num_chunks * cn_delay) + tw_rand_unif(lp->rng); + + // msg->packet_ID = lp->gid + g_tw_nlp * s->packet_counter; + msg->packet_ID = s->packet_counter; + s->packet_counter++; + msg->my_N_hop = 0; + msg->my_l_hop = 0; + msg->my_g_hop = 0; - const dragonfly_plus_param *p = s->params; + // if(msg->dest_terminal_id == TRACK) + if (msg->packet_ID == LLU(TRACK_PKT) && lp->gid == T_ID) + printf("\n Packet %llu generated at terminal %d dest %llu size %llu num chunks %llu ", msg->packet_ID, + s->terminal_id, LLU(msg->dest_terminal_id), LLU(msg->packet_size), LLU(num_chunks)); - if (s->op_arr_size >= s->max_arr_size) { - struct dfly_router_sample *tmp = - (dfly_router_sample *) calloc((MAX_STATS + s->max_arr_size), sizeof(struct dfly_router_sample)); - memcpy(tmp, s->rsamples, s->op_arr_size * sizeof(struct dfly_router_sample)); - free(s->rsamples); - s->rsamples = tmp; - s->max_arr_size += MAX_STATS; - } + for (int i = 0; i < num_chunks; i++) { + terminal_plus_message_list *cur_chunk = + (terminal_plus_message_list *) calloc(1, sizeof(terminal_plus_message_list)); + msg->origin_router_id = s->router_id; + init_terminal_plus_message_list(cur_chunk, msg); - int i = 0; - int cur_indx = s->op_arr_size; + if (msg->remote_event_size_bytes + msg->local_event_size_bytes > 0) { + cur_chunk->event_data = + (char *) calloc(1, msg->remote_event_size_bytes + msg->local_event_size_bytes); + } - s->rsamples[cur_indx].router_id = s->router_id; - s->rsamples[cur_indx].end_time = tw_now(lp); - s->rsamples[cur_indx].fwd_events = s->fwd_events; - s->rsamples[cur_indx].rev_events = s->rev_events; + void *m_data_src = model_net_method_get_edata(DRAGONFLY_PLUS, msg); + if (msg->remote_event_size_bytes) { + memcpy(cur_chunk->event_data, m_data_src, msg->remote_event_size_bytes); + } + if (msg->local_event_size_bytes) { + m_data_src = (char *) m_data_src + msg->remote_event_size_bytes; + memcpy((char *) cur_chunk->event_data + msg->remote_event_size_bytes, m_data_src, + msg->local_event_size_bytes); + } - for (; i < p->radix; i++) { - s->rsamples[cur_indx].busy_time[i] = s->busy_time_sample[i]; - s->rsamples[cur_indx].link_traffic_sample[i] = s->link_traffic_sample[i]; + cur_chunk->msg.output_chan = vcg; //By default is 0 but QoS can mean more than just a single VC for terminals + cur_chunk->msg.chunk_id = i; + cur_chunk->msg.origin_router_id = s->router_id; + append_to_terminal_plus_message_list(s->terminal_msgs, s->terminal_msgs_tail, vcg, cur_chunk); + s->terminal_length[vcg] += s->params->chunk_size; } - s->op_arr_size++; + if (s->terminal_length[vcg] < s->params->cn_vc_size) { + model_net_method_idle_event(nic_ts, 0, lp); + } + else { + bf->c11 = 1; + s->issueIdle = 1; + s->stalled_chunks++; - /* clear up the current router stats */ - s->fwd_events = 0; - s->rev_events = 0; + //this block was missing from when QOS was added - readded 10-31-19 + if(s->last_buf_full == 0.0) + { + bf->c8 = 1; + msg->saved_busy_time = s->last_buf_full; + /* TODO: Assumes a single vc from terminal to router */ + s->last_buf_full = tw_now(lp); + } + } - for (i = 0; i < p->radix; i++) { - s->busy_time_sample[i] = 0; - s->link_traffic_sample[i] = 0; + if (s->in_send_loop == 0) { + bf->c5 = 1; + msg->num_cll++; + ts = codes_local_latency(lp); + terminal_plus_message *m; + tw_event *e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_PLUS, (void **) &m, NULL); + m->type = T_SEND; + m->magic = terminal_magic_num; + s->in_send_loop = 1; + tw_event_send(e); } + + total_event_size = + model_net_get_msg_sz(DRAGONFLY_PLUS) + msg->remote_event_size_bytes + msg->local_event_size_bytes; + mn_stats *stat; + stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->send_count++; + stat->send_bytes += msg->packet_size; + stat->send_time += (1 / p->cn_bandwidth) * msg->packet_size; + if (stat->max_event_size < total_event_size) + stat->max_event_size = total_event_size; + + return; } -void dragonfly_plus_rsample_fin(router_state *s, tw_lp *lp) +static void packet_send_rc(terminal_state * s, tw_bf * bf, terminal_plus_message * msg, tw_lp * lp) { - (void) lp; - const dragonfly_plus_param *p = s->params; + int num_qos_levels = s->params->num_qos_levels; - if (s->router_id == 0) { - /* write metadata file */ - char meta_fname[64]; - sprintf(meta_fname, "dragonfly-router-sampling.meta"); + if(msg->qos_reset1) + s->qos_status[0] = Q_ACTIVE; + if(msg->qos_reset2) + s->qos_status[1] = Q_ACTIVE; + + if(msg->last_saved_qos) + s->last_qos_lvl = msg->last_saved_qos; - FILE *fp = fopen(meta_fname, "w"); - fprintf(fp, - "Router sample struct format: \nrouter_id (tw_lpid) \nbusy time for each of the %d links " - "(double) \n" - "link traffic for each of the %d links (int64_t) \nsample end time (double) forward events " - "per sample \nreverse events per sample ", - p->radix, p->radix); - fclose(fp); + if(bf->c1) { + s->in_send_loop = 1; + if(bf->c3) + s->last_buf_full = msg->saved_busy_time; + return; } - char rt_fn[MAX_NAME_LENGTH]; - if (strcmp(router_sample_file, "") == 0) - sprintf(rt_fn, "dragonfly-router-sampling-%ld.bin", g_tw_mynode); - else - sprintf(rt_fn, "%s-%ld.bin", router_sample_file, g_tw_mynode); - - int i = 0; - - int size_sample = sizeof(tw_lpid) + p->radix * (sizeof(int64_t) + sizeof(tw_stime)) + - sizeof(tw_stime) + 2 * sizeof(long); - FILE *fp = fopen(rt_fn, "a"); - fseek(fp, sample_rtr_bytes_written, SEEK_SET); - - for (; i < s->op_arr_size; i++) { - fwrite((void *) &(s->rsamples[i].router_id), sizeof(tw_lpid), 1, fp); - fwrite(s->rsamples[i].busy_time, sizeof(tw_stime), p->radix, fp); - fwrite(s->rsamples[i].link_traffic_sample, sizeof(int64_t), p->radix, fp); - fwrite((void *) &(s->rsamples[i].end_time), sizeof(tw_stime), 1, fp); - fwrite((void *) &(s->rsamples[i].fwd_events), sizeof(long), 1, fp); - fwrite((void *) &(s->rsamples[i].rev_events), sizeof(long), 1, fp); + + for(int i = 0; i < msg->num_cll; i++) + { + codes_local_latency_reverse(lp); } - sample_rtr_bytes_written += (s->op_arr_size * size_sample); - fclose(fp); -} -void dragonfly_plus_sample_init(terminal_state *s, tw_lp *lp) -{ - (void) lp; - s->fin_chunks_sample = 0; - s->data_size_sample = 0; - s->fin_hops_sample = 0; - s->fin_chunks_time = 0; - s->busy_time_sample = 0; - - s->op_arr_size = 0; - s->max_arr_size = MAX_STATS; + + for(int i = 0; i < msg->num_rngs; i++) + { + tw_rand_reverse_unif(lp->rng); + } + int vcg = msg->saved_vc; + s->terminal_available_time = msg->saved_available_time; + + s->terminal_length[vcg] += s->params->chunk_size; + /*TODO: MM change this to the vcg */ + s->vc_occupancy[vcg] -= s->params->chunk_size; - s->sample_stat = (dfly_cn_sample *) calloc(MAX_STATS, sizeof(struct dfly_cn_sample)); -} -void dragonfly_plus_sample_rc_fn(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) -{ - (void) lp; - (void) bf; - (void) msg; + terminal_plus_message_list* cur_entry = (terminal_plus_message_list *)rc_stack_pop(s->st); + + int data_size = s->params->chunk_size; + if(cur_entry->msg.packet_size < s->params->chunk_size) + data_size = cur_entry->msg.packet_size % s->params->chunk_size; - s->op_arr_size--; - int cur_indx = s->op_arr_size; - struct dfly_cn_sample stat = s->sample_stat[cur_indx]; - s->busy_time_sample = stat.busy_time_sample; - s->fin_chunks_time = stat.fin_chunks_time; - s->fin_hops_sample = stat.fin_hops_sample; - s->data_size_sample = stat.data_size_sample; - s->fin_chunks_sample = stat.fin_chunks_sample; - s->fwd_events = stat.fwd_events; - s->rev_events = stat.rev_events; + s->qos_data[vcg] -= data_size; - stat.busy_time_sample = 0; - stat.fin_chunks_time = 0; - stat.fin_hops_sample = 0; - stat.data_size_sample = 0; - stat.fin_chunks_sample = 0; - stat.end_time = 0; - stat.terminal_id = 0; - stat.fwd_events = 0; - stat.rev_events = 0; + prepend_to_terminal_plus_message_list(s->terminal_msgs, + s->terminal_msgs_tail, vcg, cur_entry); + if(bf->c4) { + s->in_send_loop = 1; + } + if (bf->c5) { + s->issueIdle = 1; + if (bf->c6) { + s->busy_time = msg->saved_total_time; + s->last_buf_full = msg->saved_busy_time; + s->busy_time_sample = msg->saved_sample_time; + s->ross_sample.busy_time_sample = msg->saved_sample_time; + s->busy_time_ross_sample = msg->saved_busy_time_ross; + } + } + return; } - -void dragonfly_plus_sample_fn(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +/* sends the packet from the current dragonfly compute node to the attached router */ +static void packet_send(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) { - (void) lp; - (void) msg; - (void) bf; + tw_stime ts; + tw_event *e; + terminal_plus_message *m; + tw_lpid router_id; + + int vcg = 0; + int num_qos_levels = s->params->num_qos_levels; + + msg->last_saved_qos = -1; + msg->qos_reset1 = -1; + msg->qos_reset2 = -1; + msg->num_rngs = 0; + msg->num_cll = 0; + + vcg = get_next_vcg(s, bf, msg, lp); + + /* For a terminal to router connection, there would be as many VCGs as number + * of VCs*/ - if (s->op_arr_size >= s->max_arr_size) { - /* In the worst case, copy array to a new memory location, its very - * expensive operation though */ - struct dfly_cn_sample *tmp = - (dfly_cn_sample *) calloc((MAX_STATS + s->max_arr_size), sizeof(struct dfly_cn_sample)); - memcpy(tmp, s->sample_stat, s->op_arr_size * sizeof(struct dfly_cn_sample)); - free(s->sample_stat); - s->sample_stat = tmp; - s->max_arr_size += MAX_STATS; + if(vcg == -1) { + bf->c1 = 1; + s->in_send_loop = 0; + if(!s->last_buf_full) { + bf->c3 = 1; + msg->saved_busy_time = s->last_buf_full; + s->last_buf_full = tw_now(lp); + } + return; } + + msg->saved_vc = vcg; + terminal_plus_message_list* cur_entry = s->terminal_msgs[vcg]; + int data_size = s->params->chunk_size; + uint64_t num_chunks = cur_entry->msg.packet_size / s->params->chunk_size; + if (cur_entry->msg.packet_size < s->params->chunk_size) + num_chunks++; - int cur_indx = s->op_arr_size; - - s->sample_stat[cur_indx].terminal_id = s->terminal_id; - s->sample_stat[cur_indx].fin_chunks_sample = s->fin_chunks_sample; - s->sample_stat[cur_indx].data_size_sample = s->data_size_sample; - s->sample_stat[cur_indx].fin_hops_sample = s->fin_hops_sample; - s->sample_stat[cur_indx].fin_chunks_time = s->fin_chunks_time; - s->sample_stat[cur_indx].busy_time_sample = s->busy_time_sample; - s->sample_stat[cur_indx].end_time = tw_now(lp); - s->sample_stat[cur_indx].fwd_events = s->fwd_events; - s->sample_stat[cur_indx].rev_events = s->rev_events; + tw_stime delay = s->params->cn_delay; + if ((cur_entry->msg.packet_size < s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) { + data_size = cur_entry->msg.packet_size % s->params->chunk_size; + delay = bytes_to_ns(cur_entry->msg.packet_size % s->params->chunk_size, s->params->cn_bandwidth); + } - s->op_arr_size++; - s->fin_chunks_sample = 0; - s->data_size_sample = 0; - s->fin_hops_sample = 0; - s->fwd_events = 0; - s->rev_events = 0; - s->fin_chunks_time = 0; - s->busy_time_sample = 0; -} + s->qos_data[vcg] += data_size; -void dragonfly_plus_sample_fin(terminal_state *s, tw_lp *lp) -{ - (void) lp; + msg->saved_available_time = s->terminal_available_time; + msg->num_rngs++; + ts = g_tw_lookahead + delay + tw_rand_unif(lp->rng); - if (!g_tw_mynode) { - /* write metadata file */ - char meta_fname[64]; - sprintf(meta_fname, "dragonfly-cn-sampling.meta"); + s->terminal_available_time = maxd(s->terminal_available_time, tw_now(lp)); + s->terminal_available_time += ts; - FILE *fp = fopen(meta_fname, "w"); - fprintf( - fp, - "Compute node sample format\nterminal_id (tw_lpid) \nfinished chunks (long)" - "\ndata size per sample (long) \nfinished hops (double) \ntime to finish chunks (double)" - "\nbusy time (double)\nsample end time(double) \nforward events (long) \nreverse events (long)"); - fclose(fp); + ts = s->terminal_available_time - tw_now(lp); + // TODO: be annotation-aware + codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL, &mapping_type_id, NULL, + &mapping_rep_id, &mapping_offset); + codes_mapping_get_lp_id(lp_group_name, LP_CONFIG_NM_ROUT, NULL, 1, s->router_id / num_routers_per_mgrp, + s->router_id % num_routers_per_mgrp, &router_id); + // printf("\n Local router id %d global router id %d ", s->router_id, router_id); + // we are sending an event to the router, so no method_event here + void *remote_event; + e = model_net_method_event_new(router_id, ts, lp, DRAGONFLY_PLUS_ROUTER, (void **) &m, &remote_event); + memcpy(m, &cur_entry->msg, sizeof(terminal_plus_message)); + if (m->remote_event_size_bytes) { + memcpy(remote_event, cur_entry->event_data, m->remote_event_size_bytes); } - char rt_fn[MAX_NAME_LENGTH]; - if (strncmp(cn_sample_file, "", 10) == 0) - sprintf(rt_fn, "dragonfly-cn-sampling-%ld.bin", g_tw_mynode); - else - sprintf(rt_fn, "%s-%ld.bin", cn_sample_file, g_tw_mynode); - - FILE *fp = fopen(rt_fn, "a"); - fseek(fp, sample_bytes_written, SEEK_SET); - fwrite(s->sample_stat, sizeof(struct dfly_cn_sample), s->op_arr_size, fp); - fclose(fp); + m->type = R_ARRIVE; + m->src_terminal_id = lp->gid; + m->vc_index = vcg; + m->last_hop = TERMINAL; + m->magic = router_magic_num; + m->path_type = -1; + m->local_event_size_bytes = 0; + m->intm_rtr_id = -1; + m->intm_group_id = -1; + m->dfp_upward_channel_flag = 0; + tw_event_send(e); - sample_bytes_written += (s->op_arr_size * sizeof(struct dfly_cn_sample)); -} -static void terminal_buf_update_rc(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) -{ - int vcg = 0; - int num_qos_levels = s->params->num_qos_levels; + if (cur_entry->msg.chunk_id == num_chunks - 1 && (cur_entry->msg.local_event_size_bytes > 0)) { + msg->num_cll++; + tw_stime local_ts = codes_local_latency(lp); + tw_event *e_new = tw_event_new(cur_entry->msg.sender_lp, local_ts, lp); + void *m_new = tw_event_data(e_new); + void *local_event = (char *) cur_entry->event_data + cur_entry->msg.remote_event_size_bytes; + memcpy(m_new, local_event, cur_entry->msg.local_event_size_bytes); + tw_event_send(e_new); + } + + // s->packet_counter++; + s->vc_occupancy[vcg] += s->params->chunk_size; + cur_entry = return_head(s->terminal_msgs, s->terminal_msgs_tail, vcg); + rc_stack_push(lp, cur_entry, delete_terminal_plus_message_list, s->st); + s->terminal_length[vcg] -= s->params->chunk_size; + + int next_vcg = 0; + + if(num_qos_levels > 1) + next_vcg = get_next_vcg(s, bf, msg, lp); - for(int i = 0; i < msg->num_cll; i++) - codes_local_latency_reverse(lp); + cur_entry = NULL; + if(next_vcg >= 0) + cur_entry = s->terminal_msgs[next_vcg]; - if(num_qos_levels > 1) - vcg = get_vcg_from_category(msg); - - s->vc_occupancy[vcg] += s->params->chunk_size; - if (bf->c1) { + /* if there is another packet inline then schedule another send event */ + if (cur_entry != NULL && s->vc_occupancy[next_vcg] + s->params->chunk_size <= s->params->cn_vc_size) { + terminal_plus_message *m_new; + msg->num_rngs++; + ts += tw_rand_unif(lp->rng); + e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_PLUS, (void **) &m_new, NULL); + m_new->type = T_SEND; + m_new->magic = terminal_magic_num; + tw_event_send(e); + } + else { + /* If not then the LP will wait for another credit or packet generation */ + bf->c4 = 1; s->in_send_loop = 0; } - return; -} -/* update the compute node-router channel buffer */ -static void terminal_buf_update(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) -{ - msg->num_cll = 0; - msg->num_rngs = 0; - - bf->c1 = 0; - bf->c2 = 0; - bf->c3 = 0; - int vcg = 0; - - if(s->params->num_qos_levels > 1) - vcg = get_vcg_from_category(msg); + if (s->issueIdle) { + bf->c5 = 1; + s->issueIdle = 0; + msg->num_rngs++; + ts += tw_rand_unif(lp->rng); + model_net_method_idle_event(ts, 0, lp); - msg->num_cll++; - tw_stime ts = codes_local_latency(lp); - s->vc_occupancy[vcg] -= s->params->chunk_size; + if (s->last_buf_full > 0.0) { + bf->c6 = 1; + msg->saved_total_time = s->busy_time; + msg->saved_busy_time = s->last_buf_full; + msg->saved_sample_time = s->busy_time_sample; - if (s->in_send_loop == 0 && s->terminal_msgs[vcg] != NULL) { - terminal_plus_message *m; - bf->c1 = 1; - tw_event *e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_PLUS, (void **) &m, NULL); - m->type = T_SEND; - m->magic = terminal_magic_num; - s->in_send_loop = 1; - tw_event_send(e); + s->busy_time += (tw_now(lp) - s->last_buf_full); + s->busy_time_sample += (tw_now(lp) - s->last_buf_full); + s->ross_sample.busy_time_sample += (tw_now(lp) - s->last_buf_full); + msg->saved_busy_time_ross = s->busy_time_ross_sample; + s->busy_time_ross_sample += (tw_now(lp) - s->last_buf_full); + s->last_buf_full = 0.0; + } } return; } -void terminal_plus_event(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +static void send_remote_event(terminal_state *s, terminal_plus_message *msg, tw_lp *lp, tw_bf *bf, + char *event_data, int remote_event_size) { - s->fwd_events++; - //*(int *)bf = (int)0; - assert(msg->magic == terminal_magic_num); - - rc_stack_gc(lp, s->st); - switch (msg->type) { - case T_GENERATE: - packet_generate(s, bf, msg, lp); - break; - - case T_ARRIVE: - packet_arrive(s, bf, msg, lp); - break; - - case T_SEND: - packet_send(s, bf, msg, lp); - break; + void *tmp_ptr = model_net_method_get_edata(DRAGONFLY_PLUS, msg); + // tw_stime ts = g_tw_lookahead + bytes_to_ns(msg->remote_event_size_bytes, (1/s->params->cn_bandwidth)); + msg->num_rngs++; + tw_stime ts = g_tw_lookahead + mpi_soft_overhead + tw_rand_unif(lp->rng); + if (msg->is_pull) { + bf->c4 = 1; + struct codes_mctx mc_dst = codes_mctx_set_global_direct(msg->sender_mn_lp); + struct codes_mctx mc_src = codes_mctx_set_global_direct(lp->gid); + int net_id = model_net_get_id(LP_METHOD_NM_TERM); - case T_BUFFER: - terminal_buf_update(s, bf, msg, lp); - break; - - case T_BANDWIDTH: - issue_bw_monitor_event_rc(s, bf, msg, lp); - break; + model_net_set_msg_param(MN_MSG_PARAM_START_TIME, MN_MSG_PARAM_START_TIME_VAL, &(msg->msg_start_time)); - default: - printf("\n LP %d Terminal message type not supported %d ", (int) lp->gid, msg->type); - tw_error(TW_LOC, "Msg type not supported"); + msg->event_rc = model_net_event_mctx(net_id, &mc_src, &mc_dst, msg->category, msg->sender_lp, + msg->pull_size, ts, remote_event_size, tmp_ptr, 0, NULL, lp); + } + else { + tw_event *e = tw_event_new(msg->final_dest_gid, ts, lp); + void *m_remote = tw_event_data(e); + memcpy(m_remote, event_data, remote_event_size); + tw_event_send(e); } + return; } -void dragonfly_plus_terminal_final(terminal_state *s, tw_lp *lp) +static void packet_arrive_rc(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) { - model_net_print_stats(lp->gid, s->dragonfly_stats_array); + for(int i = 0; i < msg->num_rngs; i++) + tw_rand_reverse_unif(lp->rng); - int written = 0; - if (s->terminal_id == 0) { - written += sprintf(s->output_buf + written, "# Format < dest_type> "); + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); + + if (bf->c31) { + s->packet_fin--; + packet_fin--; } - written += sprintf(s->output_buf + written, "\n%u %s %llu %s %s %llu %lf", - s->terminal_id, "T", s->router_id, "R", "CN", LLU(s->total_msg_size), s->busy_time); + if (msg->path_type == MINIMAL) + minimal_count--; + else + nonmin_count--; + // if (msg->path_type == NON_MINIMAL) + // nonmin_count--; - lp_io_write(lp->gid, (char*)"dragonfly-plus-link-stats", written, s->output_buf); + N_finished_chunks--; + s->finished_chunks--; + s->fin_chunks_sample--; + s->ross_sample.fin_chunks_sample--; + s->fin_chunks_ross_sample--; + total_hops -= msg->my_N_hop; + s->total_hops -= msg->my_N_hop; + s->fin_hops_sample -= msg->my_N_hop; + s->ross_sample.fin_hops_sample -= msg->my_N_hop; + s->fin_hops_ross_sample -= msg->my_N_hop; + s->fin_chunks_time = msg->saved_sample_time; + s->ross_sample.fin_chunks_time = msg->saved_sample_time; + s->fin_chunks_time_ross_sample = msg->saved_fin_chunks_ross; + s->total_time = msg->saved_avg_time; - if (s->terminal_id == 0) { - char meta_filename[64]; - sprintf(meta_filename, "dragonfly-plus-cn-stats.meta"); + struct qhash_head *hash_link = NULL; + struct dfly_qhash_entry *tmp = NULL; - FILE * fp = fopen(meta_filename, "w+"); - fprintf(fp, "# Format <# Flits/Packets finished> \n"); - } - - written = 0; - written += sprintf(s->output_buf2 + written, "%llu %llu %lf %lf %lf %lf %llu %lf\n", - lp->gid, s->terminal_id, s->total_time/s->finished_chunks, - s->busy_time, s->max_latency, s->min_latency, - s->finished_packets, (double)s->total_hops/s->finished_chunks); + struct dfly_hash_key key; + key.message_id = msg->message_id; + key.sender_id = msg->sender_lp; + hash_link = qhash_search(s->rank_tbl, &key); + tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); - lp_io_write(lp->gid, (char*)"dragonfly-plus-cn-stats", written, s->output_buf2); + mn_stats *stat; + stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + stat->recv_time = msg->saved_rcv_time; - // if (s->terminal_id == 0) { - // char meta_filename[64]; - // sprintf(meta_filename, "dragonfly-msg-stats.meta"); + if (bf->c1) { + stat->recv_count--; + stat->recv_bytes -= msg->packet_size; + N_finished_packets--; + s->finished_packets--; + } - // FILE *fp = fopen(meta_filename, "w+"); - // fprintf(fp, - // "# Format <# Flits/Packets " - // "finished> \n"); - // } - // int written = 0; + if (bf->c22) { + s->max_latency = msg->saved_available_time; + } + if (bf->c7) { + // assert(!hash_link); + N_finished_msgs--; + s->finished_msgs--; + total_msg_sz -= msg->total_size; + s->total_msg_size -= msg->total_size; + s->data_size_sample -= msg->total_size; + s->ross_sample.data_size_sample -= msg->total_size; + s->data_size_ross_sample -= msg->total_size; - // written += sprintf(s->output_buf + written, "%llu %u %llu %lf %ld %lf %lf %lf %lf\n", LLU(lp->gid), - // s->terminal_id, LLU(s->total_msg_size), s->total_time / s->finished_chunks, - // s->finished_packets, (double) s->total_hops / s->finished_chunks, s->busy_time, - // s->max_latency, s->min_latency); + struct dfly_qhash_entry *d_entry_pop = (dfly_qhash_entry *) rc_stack_pop(s->st); + qhash_add(s->rank_tbl, &key, &(d_entry_pop->hash_link)); + s->rank_tbl_pop++; - // lp_io_write(lp->gid, (char *) "dragonfly-msg-stats", written, s->output_buf); + if (s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) + tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); - if (s->terminal_msgs[0] != NULL) - printf("[%llu] leftover terminal messages \n", LLU(lp->gid)); + hash_link = &(d_entry_pop->hash_link); + tmp = d_entry_pop; - // if(s->packet_gen != s->packet_fin) - // printf("\n generated %d finished %d ", s->packet_gen, s->packet_fin); + if (bf->c4) + model_net_event_rc2(lp, &msg->event_rc); + } - if (s->rank_tbl) - qhash_finalize(s->rank_tbl); + assert(tmp); + tmp->num_chunks--; - rc_stack_destroy(s->st); - free(s->vc_occupancy); - free(s->terminal_msgs); - free(s->terminal_msgs_tail); + if (bf->c5) { + qhash_del(hash_link); + free_tmp(tmp); + s->rank_tbl_pop--; + } + return; } -void dragonfly_plus_router_final(router_state *s, tw_lp *lp) +/* packet arrives at the destination terminal */ +static void packet_arrive(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) { - // int max_gc_usage = 0; - // int min_gc_usage = INT_MAX; + // NIC aggregation - should this be a separate function? + // Trigger an event on receiving server - // int running_sum = 0; - // for(int i = 0; i < s->params->num_global_connections; i++) - // { - // int gc_val = s->gc_usage[i]; - // running_sum += gc_val; + if (msg->my_N_hop > s->params->max_hops_notify) + { + printf("Terminal received a packet with %d hops! (Notify on > than %d)\n",msg->my_N_hop, s->params->max_hops_notify); + } + + msg->num_rngs = 0; + msg->num_cll = 0; - // if (gc_val > max_gc_usage) - // max_gc_usage = gc_val; - // if (gc_val < min_gc_usage) - // min_gc_usage = gc_val; - // } - // double mean_gc_usage = (double) running_sum / (double) s->params->num_global_connections; + if (!s->rank_tbl) + s->rank_tbl = qhash_init(dragonfly_rank_hash_compare, dragonfly_hash_func, DFLY_HASH_TABLE_SIZE); - // if (s->dfp_router_type == SPINE) { - // printf("Router %d in group %d: Min GC Usage= %d Max GC Usage= %d Mean GC Usage= %.2f", s->router_id, s->router_id / s->params->num_routers, min_gc_usage, max_gc_usage, mean_gc_usage); - // printf("\t["); - // for(int i = 0; i < s->params->num_global_connections; i++) - // { - // printf("%d ",s->gc_usage[i]); - // } - // printf("]\n"); - // } + struct dfly_hash_key key; + key.message_id = msg->message_id; + key.sender_id = msg->sender_lp; -#if DEBUG_QOS - if(s->router_id == 0) - fclose(dragonfly_rtr_bw_log); -#endif + struct qhash_head *hash_link = NULL; + struct dfly_qhash_entry *tmp = NULL; - free(s->global_channel); - int i, j; - for (i = 0; i < s->params->radix; i++) { - for (j = 0; j < s->params->num_vcs; j++) { - if (s->queued_msgs[i][j] != NULL) { - printf("[%llu] leftover queued messages %d %d %d\n", LLU(lp->gid), i, j, - s->vc_occupancy[i][j]); - } - if (s->pending_msgs[i][j] != NULL) { - printf("[%llu] lefover pending messages %d %d\n", LLU(lp->gid), i, j); - } - } - } + hash_link = qhash_search(s->rank_tbl, &key); - rc_stack_destroy(s->st); + if (hash_link) + tmp = qhash_entry(hash_link, struct dfly_qhash_entry, hash_link); - const dragonfly_plus_param *p = s->params; - int written = 0; - int src_rel_id = s->router_id % p->num_routers; - int local_grp_id = s->router_id / p->num_routers; + uint64_t total_chunks = msg->total_size / s->params->chunk_size; - for( int d = 0; d < p->intra_grp_radix; d++) - { - if (d != src_rel_id) { - int dest_ab_id = local_grp_id * p->num_routers + d; - written += sprintf(s->output_buf + written, "\n%d %s %d %s %s %llu %lf", - s->router_id, - "R", - dest_ab_id, - "R", - "L", - s->link_traffic[d], - s->busy_time[d] ); - } - } + if (msg->total_size % s->params->chunk_size) + total_chunks++; - vector< Connection > my_global_links = s->connMan->get_connections_by_type(CONN_GLOBAL); - vector< Connection >::iterator it = my_global_links.begin(); + if (!total_chunks) + total_chunks = 1; - for(; it != my_global_links.end(); it++) + /*if(tmp) { - int dest_rtr_id = it->dest_gid; - int port_no = it->port; - assert(port_no >= 0 && port_no < p->radix); - written += sprintf(s->output_buf + written, "\n%d %s %d %s %s %llu %lf", - s->router_id, - "R", - dest_rtr_id, - "R", - "G", - s->link_traffic[port_no], - s->busy_time[port_no] ); - } - - sprintf(s->output_buf + written, "\n"); - lp_io_write(lp->gid, (char*)"dragonfly-plus-link-stats", written, s->output_buf); - - // /*MM: These statistics will need to be updated for dragonfly plus. - // * Especially the meta file information on router ports still have green - // * and black links. */ - // const dragonfly_plus_param *p = s->params; - // int written = 0; - // if (!s->router_id) { - // written = - // sprintf(s->output_buf, "# Format "); - // written += sprintf(s->output_buf + written, "\n# Router ports in the order: %d Intra Links, %d Inter Links %d Terminal Links. Hyphens for Unconnected ports (No terminals on Spine routers)", p->intra_grp_radix, p->num_global_connections, p->num_cn); - // } - - // char router_type[10]; - // if (s->dfp_router_type == LEAF) - // strcpy(router_type,"LEAF"); - // else if(s->dfp_router_type == SPINE) - // strcpy(router_type,"SPINE"); + if(tmp->num_chunks >= total_chunks || tmp->num_chunks < 0) + { + //tw_output(lp, "\n invalid number of chunks %d for LP %ld ", tmp->num_chunks, lp->gid); + tw_lp_suspend(lp, 0, 0); + return; + } + }*/ + assert(lp->gid == msg->dest_terminal_id); - // written += sprintf(s->output_buf + written, "\n%s %llu %d %d", router_type, LLU(lp->gid), s->router_id / p->num_routers, s->router_id % p->num_routers); - // for (int d = 0; d < p->radix; d++) { - // bool printed_hyphen = false; - // ConnectionType port_type = s->connMan->get_port_type(d); - - // if (port_type == 0) { - // written += sprintf(s->output_buf + written, " -"); - // printed_hyphen = true; - // } - // if (printed_hyphen == false) - // written += sprintf(s->output_buf + written, " %lf", s->busy_time[d]); - // } + if (msg->packet_ID == LLU(TRACK_PKT) && msg->src_terminal_id == T_ID) + printf("\n Packet %llu arrived at lp %llu hops %d ", msg->packet_ID, LLU(lp->gid), msg->my_N_hop); - // sprintf(s->output_buf + written, "\n"); - // lp_io_write(lp->gid, (char *) "dragonfly-plus-router-stats", written, s->output_buf); + msg->num_rngs++; + tw_stime ts = g_tw_lookahead + s->params->cn_credit_delay + tw_rand_unif(lp->rng); - // written = 0; - // if (!s->router_id) { - // written = - // sprintf(s->output_buf2, "# Format "); - // written += sprintf(s->output_buf2 + written, "\n# Router ports in the order: %d Intra Links, %d Inter Links %d Terminal Links. Hyphens for Unconnected ports (No terminals on Spine routers)", p->intra_grp_radix, p->num_global_connections, p->num_cn); - // } - // written += sprintf(s->output_buf2 + written, "\n%s %llu %d %d", router_type, LLU(lp->gid), s->router_id / p->num_routers, s->router_id % p->num_routers); + // no method_event here - message going to router + tw_event *buf_e; + terminal_plus_message *buf_msg; + buf_e = + model_net_method_event_new(msg->intm_lp_id, ts, lp, DRAGONFLY_PLUS_ROUTER, (void **) &buf_msg, NULL); + buf_msg->magic = router_magic_num; + buf_msg->vc_index = msg->vc_index; + buf_msg->output_chan = msg->output_chan; + buf_msg->type = R_BUFFER; + tw_event_send(buf_e); - // for (int d = 0; d < p->radix; d++) { - // bool printed_hyphen = false; - // ConnectionType port_type = s->connMan->get_port_type(d); + bf->c1 = 0; + bf->c3 = 0; + bf->c4 = 0; + bf->c7 = 0; - // if (port_type == 0) { - // written += sprintf(s->output_buf2 + written, " -"); - // printed_hyphen = true; - // } - // if (printed_hyphen == false) - // written += sprintf(s->output_buf2 + written, " %lld", LLD(s->link_traffic[d])); - // } + /* Total overall finished chunks in simulation */ + N_finished_chunks++; + /* Finished chunks on a LP basis */ + s->finished_chunks++; + /* Finished chunks per sample */ + s->fin_chunks_sample++; + s->ross_sample.fin_chunks_sample++; + s->fin_chunks_ross_sample++; - // lp_io_write(lp->gid, (char *) "dragonfly-plus-router-traffic", written, s->output_buf2); -} + /* WE do not allow self messages through dragonfly */ + assert(lp->gid != msg->src_terminal_id); -static int get_min_hops_to_dest_from_conn(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, Connection conn) -{ - int my_type = s->dfp_router_type; - int next_hops_type = dragonfly_plus_get_router_type(conn.dest_gid, s->params); + // Verify that the router that send the packet to this terminal is the router assigned to this terminal + int dest_router_id = dragonfly_plus_get_assigned_router_id(s->terminal_id, s->params); + int received_from_rel_id = codes_mapping_get_lp_relative_id(msg->intm_lp_id,0,0); + assert(dest_router_id == received_from_rel_id); - int dfp_dest_terminal_id = msg->dfp_dest_terminal_id; - int fdest_router_id = dragonfly_plus_get_assigned_router_id(dfp_dest_terminal_id, s->params); - int fdest_group_id = fdest_router_id / s->params->num_routers; + uint64_t num_chunks = msg->packet_size / s->params->chunk_size; + if (msg->packet_size < s->params->chunk_size) + num_chunks++; - if (msg->dfp_upward_channel_flag) - { - if (my_type == SPINE) - return 2; //Next Spine -> Leaf -> dest_term - else - return 3; //Next Spine -> Spine -> Leaf -> dest_term - } + if (msg->path_type == MINIMAL) + minimal_count++; + else + nonmin_count++; - if(conn.dest_group_id == fdest_group_id) { - if (next_hops_type == SPINE) - return 2; //Next Spine -> Leaf -> dest_term - else { - assert(next_hops_type == LEAF); - return 1; //Next Leaf -> dest_term - } - } - else { //next is not in final destination group - if (next_hops_type == SPINE) { - vector< Connection > cons_to_dest_group = connManagerList[conn.dest_gid].get_connections_to_group(fdest_group_id); - if (cons_to_dest_group.size() == 0) - return 5; //Next Spine -> Leaf -> Spine -> Spine -> Leaf -> dest_term - else - return 3; //Next Spine -> Spine -> Leaf -> dest_term - } - else { - assert(next_hops_type == LEAF); - return 4; //Next Leaf -> Spine -> Spine -> Leaf -> dest_term - } + if (msg->chunk_id == num_chunks - 1) { + bf->c31 = 1; + s->packet_fin++; + packet_fin++; } -} - -/** - * Scores a connection based on the metric provided in the function - * @param isMinimalPort a boolean variable used in the Gamma metric to pass whether a given port would lead to the destination in a minimal way - */ -static int dfp_score_connection(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, Connection conn, conn_minimality_t c_minimality) -{ - int score = 0; //can't forget to initialize this to zero. - int port = conn.port; + // if (msg->path_type != MINIMAL) + // printf("\n Wrong message path type %d ", msg->path_type); - if (port == -1) { - if (scoring_preference == LOWER) - return INT_MAX; - else - return 0; - } + /* save the sample time */ + msg->saved_sample_time = s->fin_chunks_time; + s->fin_chunks_time += (tw_now(lp) - msg->travel_start_time); + s->ross_sample.fin_chunks_time += (tw_now(lp) - msg->travel_start_time); + msg->saved_fin_chunks_ross = s->fin_chunks_time_ross_sample; + s->fin_chunks_time_ross_sample += (tw_now(lp) - msg->travel_start_time); - switch(scoring) { - case ALPHA: //considers vc occupancy and queued count only LOWER SCORE IS BETTER - { - for(int k=0; k < s->params->num_vcs; k++) - { - score += s->vc_occupancy[port][k]; - } - score += s->queued_count[port]; - break; - } - case BETA: //consideres vc occupancy and queued count multiplied by the number of minimum hops to the destination LOWER SCORE IS BETTER - { - int base_score = 0; - for(int k=0; k < s->params->num_vcs; k++) - { - base_score += s->vc_occupancy[port][k]; - } - base_score += s->queued_count[port]; - score = base_score * get_min_hops_to_dest_from_conn(s, bf, msg, lp, conn); - break; - } - case GAMMA: //consideres vc occupancy and queue count but ports that follow a minimal path to fdest are biased 2:1 bonus by multiplying minimal by 2 HIGHER SCORE IS BETTER - { - score = s->params->max_port_score; //initialize this to max score. - int to_subtract = 0; - for(int k=0; k < s->params->num_vcs; k++) - { - to_subtract += s->vc_occupancy[port][k]; - } - to_subtract += s->queued_count[port]; - score -= to_subtract; + /* save the total time per LP */ + msg->saved_avg_time = s->total_time; + s->total_time += (tw_now(lp) - msg->travel_start_time); + total_hops += msg->my_N_hop; + s->total_hops += msg->my_N_hop; + s->fin_hops_sample += msg->my_N_hop; + s->ross_sample.fin_hops_sample += msg->my_N_hop; + s->fin_hops_ross_sample += msg->my_N_hop; - if (c_minimality == C_MIN) //the connection maintains the paths minimality - gets a bonus of 2x - score = score * 2; - break; - } - case DELTA: //consideres vc occupancy and queue count but ports that follow a minimal path to fdest are biased 2:1 through dividing minimal by 2 Lower SCORE IS BETTER - { - for(int k=0; k < s->params->num_vcs; k++) - { - score += s->vc_occupancy[port][k]; - } - score += s->queued_count[port]; + mn_stats *stat = model_net_find_stats(msg->category, s->dragonfly_stats_array); + msg->saved_rcv_time = stat->recv_time; + stat->recv_time += (tw_now(lp) - msg->travel_start_time); - if (c_minimality != C_MIN) - score = score * 2; - break; - } - default: - tw_error(TW_LOC, "Unsupported Scoring Protocol Error\n"); +#if DEBUG == 1 + if (msg->packet_ID == TRACK && msg->chunk_id == num_chunks - 1 && msg->message_id == TRACK_MSG) { + printf("(%lf) [Terminal %d] packet %lld has arrived \n", tw_now(lp), (int) lp->gid, msg->packet_ID); - } - return score; -} + printf("travel start time is %f\n", msg->travel_start_time); -static vector< Connection > dfp_select_two_connections(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, vector< Connection > conns, short* rng_counter) -{ - if(conns.size() < 2) { - (*rng_counter)+=2; - tw_rand_integer(lp->rng,0,2); //ensure this function always uses two rngs - tw_rand_integer(lp->rng,0,2); - if(conns.size() == 1) - return conns; - if(conns.size() == 0) - return vector< Connection>(); + printf("My hop now is %d\n", msg->my_N_hop); } +#endif - int rand_sel_1, rand_sel_2_offset; + /* Now retreieve the number of chunks completed from the hash and update + * them */ + void *m_data_src = model_net_method_get_edata(DRAGONFLY_PLUS, msg); - int num_conns = conns.size(); + /* If an entry does not exist then create one */ + if (!tmp) { + bf->c5 = 1; + struct dfly_qhash_entry *d_entry = (dfly_qhash_entry *) calloc(1, sizeof(struct dfly_qhash_entry)); + d_entry->num_chunks = 0; + d_entry->key = key; + d_entry->remote_event_data = NULL; + d_entry->remote_event_size = 0; + qhash_add(s->rank_tbl, &key, &(d_entry->hash_link)); + s->rank_tbl_pop++; - (*rng_counter)+=2; - rand_sel_1 = tw_rand_integer(lp->rng, 0, num_conns-1); - rand_sel_2_offset = tw_rand_integer(lp->rng, 0, num_conns-1); //number of indices to count up from the previous selected one. Avoids selecting same one twice - int rand_sel_2 = (rand_sel_1 + rand_sel_2_offset) % num_conns; + if (s->rank_tbl_pop >= DFLY_HASH_TABLE_SIZE) + tw_error(TW_LOC, "\n Exceeded allocated qhash size, increase hash size in dragonfly model"); - vector< Connection > retVec; - retVec.push_back(conns[rand_sel_1]); - retVec.push_back(conns[rand_sel_2]); + hash_link = &(d_entry->hash_link); + tmp = d_entry; + } - return retVec; -} + assert(tmp); + tmp->num_chunks++; -static Connection get_absolute_best_connection_from_conns(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, vector conns) -{ - msg->num_rngs+=2; - tw_rand_integer(lp->rng,0,1); - tw_rand_integer(lp->rng,0,1); + if (msg->chunk_id == num_chunks - 1) { + bf->c1 = 1; + stat->recv_count++; + stat->recv_bytes += msg->packet_size; - if (conns.size() == 0) { - Connection bad_conn; - bad_conn.src_gid = -1; - bad_conn.port = -1; - return bad_conn; + N_finished_packets++; + s->finished_packets++; } - if (conns.size() == 1) { - return conns[0]; + /* if its the last chunk of the packet then handle the remote event data */ + if (msg->remote_event_size_bytes > 0 && !tmp->remote_event_data) { + /* Retreive the remote event entry */ + tmp->remote_event_data = (char *) calloc(1, msg->remote_event_size_bytes); + assert(tmp->remote_event_data); + tmp->remote_event_size = msg->remote_event_size_bytes; + memcpy(tmp->remote_event_data, m_data_src, msg->remote_event_size_bytes); + } + if (s->min_latency > tw_now(lp) - msg->travel_start_time) { + s->min_latency = tw_now(lp) - msg->travel_start_time; } - int num_to_compare = conns.size(); + if (s->max_latency < tw_now(lp) - msg->travel_start_time) { + bf->c22 = 1; + msg->saved_available_time = s->max_latency; + s->max_latency = tw_now(lp) - msg->travel_start_time; + } + /* If all chunks of a message have arrived then send a remote event to the + * callee*/ + // assert(tmp->num_chunks <= total_chunks); - int scores[num_to_compare]; - int best_score_index = 0; - if (scoring_preference == LOWER) { - - int best_score = INT_MAX; - for(int i = 0; i < num_to_compare; i++) - { - scores[i] = dfp_score_connection(s, bf, msg, lp, conns[i], C_MIN); + if (tmp->num_chunks >= total_chunks) { + bf->c7 = 1; - if (scores[i] < best_score) { - best_score = scores[i]; - best_score_index = i; - } - } - } - else { - - int best_score = 0; - for(int i = 0; i < num_to_compare; i++) - { - scores[i] = dfp_score_connection(s, bf, msg, lp, conns[i], C_MIN); + s->data_size_sample += msg->total_size; + s->ross_sample.data_size_sample += msg->total_size; + s->data_size_ross_sample += msg->total_size; + N_finished_msgs++; + total_msg_sz += msg->total_size; + s->total_msg_size += msg->total_size; + s->finished_msgs++; - if (scores[i] > best_score) { - best_score = scores[i]; - best_score_index = i; - } + // assert(tmp->remote_event_data && tmp->remote_event_size > 0); + if (tmp->remote_event_data && tmp->remote_event_size > 0) { + bf->c8 = 1; + send_remote_event(s, msg, lp, bf, tmp->remote_event_data, tmp->remote_event_size); } + /* Remove the hash entry */ + qhash_del(hash_link); + rc_stack_push(lp, tmp, free_tmp, s->st); + s->rank_tbl_pop--; } - - return conns[best_score_index]; + return; } -//two rngs per call -//TODO this defaults to minimality of min, at time of implementation all connections in conns are of same minimality so their scores compared to each other don't matter on minimality -static Connection get_best_connection_from_conns(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, vector conns) +static void terminal_buf_update_rc(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) { - if (conns.size() == 0) { - msg->num_rngs+=2; - tw_rand_integer(lp->rng, 0, 2); - tw_rand_integer(lp->rng, 0, 2); - Connection bad_conn; - bad_conn.src_gid = -1; - bad_conn.port = -1; - return bad_conn; - } - if (conns.size() < 2) { - msg->num_rngs+=2; - tw_rand_integer(lp->rng, 0, 2); - tw_rand_integer(lp->rng, 0, 2); - return conns[0]; - } - int num_to_compare = 2; //TODO make this a configurable - vector< Connection > selected_conns = dfp_select_two_connections(s, bf, msg, lp, conns, &(msg->num_rngs)); + int vcg = 0; + int num_qos_levels = s->params->num_qos_levels; - int scores[num_to_compare]; - int best_score_index = 0; - if (scoring_preference == LOWER) { - - int best_score = INT_MAX; - for(int i = 0; i < num_to_compare; i++) - { - scores[i] = dfp_score_connection(s, bf, msg, lp, selected_conns[i], C_MIN); + for(int i = 0; i < msg->num_cll; i++) + codes_local_latency_reverse(lp); - if (scores[i] < best_score) { - best_score = scores[i]; - best_score_index = i; - } - } + if(num_qos_levels > 1) + vcg = get_vcg_from_category(msg); + + s->vc_occupancy[vcg] += s->params->chunk_size; + if (bf->c1) { + s->in_send_loop = 0; } - else { - - int best_score = 0; - for(int i = 0; i < num_to_compare; i++) - { - scores[i] = dfp_score_connection(s, bf, msg, lp, selected_conns[i], C_MIN); + return; +} +/* update the compute node-router channel buffer */ +static void terminal_buf_update(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +{ + msg->num_cll = 0; + msg->num_rngs = 0; + + bf->c1 = 0; + bf->c2 = 0; + bf->c3 = 0; + int vcg = 0; - if (scores[i] > best_score) { - best_score = scores[i]; - best_score_index = i; - } - } - } + if(s->params->num_qos_levels > 1) + vcg = get_vcg_from_category(msg); - return selected_conns[best_score_index]; -} + msg->num_cll++; + tw_stime ts = codes_local_latency(lp); + s->vc_occupancy[vcg] -= s->params->chunk_size; + if (s->in_send_loop == 0 && s->terminal_msgs[vcg] != NULL) { + terminal_plus_message *m; + bf->c1 = 1; + tw_event *e = model_net_method_event_new(lp->gid, ts, lp, DRAGONFLY_PLUS, (void **) &m, NULL); + m->type = T_SEND; + m->magic = terminal_magic_num; + s->in_send_loop = 1; + tw_event_send(e); + } + return; +} -//Returns a vector of connections that are legal dragonfly plus routes that specifically would not allow for a minimal connection to the specific router specified in get_possible_stops_to_specific_router() -//Be very wary of using this method, results may not make sense if possible_minimal_stops is not a vector of minimal next stops to fdest_rotuer_id -//Nonminimal specifically refers to any move that does not move directly toward the destination router -static vector< Connection > get_legal_nonminimal_stops(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, vector< Connection > possible_minimal_stops, int fdest_router_id) +void dragonfly_plus_terminal_final(terminal_state *s, tw_lp *lp) { - int my_router_id = s->router_id; - int my_group_id = s->router_id / s->params->num_routers; - int origin_group_id = msg->origin_router_id / s->params->num_routers; - int fdest_group_id = fdest_router_id / s->params->num_routers; - bool in_intermediate_group = (my_group_id != origin_group_id) && (my_group_id != fdest_group_id); + dragonfly_total_time += s->total_time; //increment the PE level time counter + if (s->max_latency > dragonfly_max_latency) + dragonfly_max_latency = s->max_latency; //get maximum latency across all LPs on this PE - vector< Connection > possible_nonminimal_stops; - - if (my_group_id == origin_group_id) { //then we're just sending upward out of the group - if (s->dfp_router_type == LEAF) { //then any connections to spines that are not in possible_minimal_stops should be included - vector< Connection > conns_to_spines = s->connMan->get_connections_by_type(CONN_LOCAL); - possible_nonminimal_stops = set_difference_vectors(conns_to_spines, possible_minimal_stops); //get the complement of possible_minimal_stops - } - else if (s->dfp_router_type == SPINE) { //then we have to send via global connections that aren't to the dest group - vector< Connection > conns_to_other_groups = s->connMan->get_connections_by_type(CONN_GLOBAL); - possible_nonminimal_stops = set_difference_vectors(conns_to_other_groups, possible_minimal_stops); - } - } - else if (in_intermediate_group) { - if (msg->dfp_upward_channel_flag == 1) { //then we return an empty vector as the only legal moves are those that already exist in possible_minimal_stops - assert(possible_nonminimal_stops.size() == 0); - return possible_nonminimal_stops; - } - else if (s->dfp_router_type == LEAF) { //if we're a leaf in an intermediate group then the routing alg should have flipped the dfp_upward channel already but lets return empty just in case - assert(possible_nonminimal_stops.size() == 0); - return possible_nonminimal_stops; - } - else if (s->dfp_router_type == SPINE) { //then possible_minimal_stops will be the list of connections - assert(msg->dfp_upward_channel_flag == 0); - vector< Connection> conns_to_leaves = s->connMan->get_connections_by_type(CONN_LOCAL); - possible_nonminimal_stops = set_difference_vectors(conns_to_leaves, possible_minimal_stops); //get the complement of possible_minimal_stops - } - else { - tw_error(TW_LOC, "Impossible Error - Something's majorly wrong if this is tripped"); - } + model_net_print_stats(lp->gid, s->dragonfly_stats_array); + int written = 0; + if (s->terminal_id == 0) { + written += sprintf(s->output_buf + written, "# Format < dest_type> \n"); } - else if (my_group_id == fdest_group_id) { - if (s->params->dest_spine_consider_nonmin == true || s->params->dest_spine_consider_global_nonmin == true) { - if (s->dfp_router_type == SPINE) { - vector< Connection > poss_next_conns; + written += sprintf(s->output_buf + written, "%u %s %u %s %s %llu %lf %lu\n", + s->terminal_id, "T", s->router_id, "R", "CN", LLU(s->total_msg_size), s->busy_time, s->stalled_chunks); - if (s->params->dest_spine_consider_nonmin == true) { - vector< Connection > conns_to_leaves = s->connMan->get_connections_by_type(CONN_LOCAL); - for (int i = 0; i < conns_to_leaves.size(); i++) - { - if (conns_to_leaves[i].dest_gid != fdest_router_id) - poss_next_conns.push_back(conns_to_leaves[i]); - } - } - if (s->params->dest_spine_consider_global_nonmin == true) { - vector< Connection > conns_to_spines = s->connMan->get_connections_by_type(CONN_GLOBAL); - for (int i = 0; i < conns_to_spines.size(); i++) - { - if (conns_to_spines[i].dest_group_id != fdest_group_id && conns_to_spines[i].dest_group_id != origin_group_id) { - poss_next_conns.push_back(conns_to_spines[i]); - } - } - } + lp_io_write(lp->gid, (char*)"dragonfly-plus-link-stats", written, s->output_buf); - return poss_next_conns; - } - else { - assert(s->dfp_router_type == LEAF); - assert(possible_nonminimal_stops.size() == 0); //empty because a leaf in the destination group has no legal nonminimal moves - return possible_nonminimal_stops; - } + // if (s->terminal_id == 0) { + // char meta_filename[64]; + // sprintf(meta_filename, "dragonfly-plus-cn-stats.meta"); + // FILE * fp = fopen(meta_filename, "w+"); + // fprintf(fp, "# Format <# Flits/Packets finished> \n"); + // } - } - } - else { - tw_error(TW_LOC, "Invalid group classification\n"); + written = 0; + if(s->terminal_id == 0) + { + written += sprintf(s->output_buf2 + written, "# Format <# Packets finished> \n"); } + written += sprintf(s->output_buf2 + written, "%llu %u %d %llu %lf %lf %lf %ld %lf %lf\n", + LLU(lp->gid), s->terminal_id, s->total_gen_size, LLU(s->total_msg_size), s->total_time/s->finished_chunks, s->max_latency, s->min_latency, + s->finished_packets, (double)s->total_hops/s->finished_chunks, s->busy_time); - // assert(possible_nonminimal_stops.size() > 0); - return possible_nonminimal_stops; -} + + // written = 0; + // written += sprintf(s->output_buf2 + written, "%llu %u %lf %lf %lf %lf %ld %lf\n", + // LLU(lp->gid), s->terminal_id, s->total_time/s->finished_chunks, + // s->busy_time, s->max_latency, s->min_latency, + // s->finished_packets, (double)s->total_hops/s->finished_chunks); -//The term minimal in this case refers to "Moving toward destination router". If a packet is at an intermediate spine and that spine doesn't -//have a direct connection to the destinatino group, then there wouldn't be any "legal minimal stops". The packet would have to continue to -//some leaf in the group first. THIS DOES NOT INCLUDE REROUTING. The only time an intermediate leaf can send to an intermediate spine is if -//dfp_upward_channel_flag is 0. -static vector< Connection > get_legal_minimal_stops(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, int fdest_router_id) -{ - int my_router_id = s->router_id; - int my_group_id = s->router_id / s->params->num_routers; - int origin_group_id = msg->origin_router_id / s->params->num_routers; - int fdest_group_id = fdest_router_id / s->params->num_routers; - if (my_group_id != fdest_group_id) { //then we're in the source or the intermediate group. - if (s->dfp_router_type == LEAF) { - vector< Connection> possible_next_conns_to_group; - set poss_router_id_set_to_group; - for(int i = 0; i < connectionList[my_group_id][fdest_group_id].size(); i++) - { - int poss_router_id = connectionList[my_group_id][fdest_group_id][i]; - // printf("%d\n",poss_router_id); - if (poss_router_id_set_to_group.count(poss_router_id) == 0) { //if we haven't added the connections from poss_router_id yet - vector< Connection > conns = s->connMan->get_connections_to_gid(poss_router_id, CONN_LOCAL); - poss_router_id_set_to_group.insert(poss_router_id); - possible_next_conns_to_group.insert(possible_next_conns_to_group.end(), conns.begin(), conns.end()); - } - } - return possible_next_conns_to_group; - } - else if (s->dfp_router_type == SPINE) { - return s->connMan->get_connections_to_group(fdest_group_id); - } - } - else { - assert(my_group_id == fdest_group_id); - if (s->dfp_router_type == SPINE) { - vector< Connection > possible_next_conns = s->connMan->get_connections_to_gid(fdest_router_id, CONN_LOCAL); - return possible_next_conns; - } - else { - assert(s->dfp_router_type == LEAF); - - if (my_router_id != fdest_router_id) { //then we're also the source group and we need to send to any spine in our group - assert(my_group_id == origin_group_id); - return s->connMan->get_connections_by_type(CONN_LOCAL); - } - else { //then we're the dest router - assert(my_router_id == fdest_router_id); - vector< Connection > empty; - return empty; - } - } - } - vector< Connection > empty; - return empty; -} + lp_io_write(lp->gid, (char*)"dragonfly-plus-cn-stats", written, s->output_buf2); + + // if (s->terminal_id == 0) { + // char meta_filename[64]; + // sprintf(meta_filename, "dragonfly-msg-stats.meta"); + // FILE *fp = fopen(meta_filename, "w+"); + // fprintf(fp, + // "# Format <# Flits/Packets " + // "finished> \n"); + // } + // int written = 0; -static Connection do_dfp_prog_adaptive_routing(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, int fdest_router_id) -{ - int my_router_id = s->router_id; - int my_group_id = s->router_id / s->params->num_routers; - int fdest_group_id = fdest_router_id / s->params->num_routers; - int origin_group_id = msg->origin_router_id / s->params->num_routers; - bool in_intermediate_group = (my_group_id != origin_group_id) && (my_group_id != fdest_group_id); - bool outside_source_group = (my_group_id != origin_group_id); - int adaptive_threshold = s->params->adaptive_threshold; - bool next_hop_is_global = false; + // written += sprintf(s->output_buf + written, "%llu %u %llu %lf %ld %lf %lf %lf %lf\n", LLU(lp->gid), + // s->terminal_id, LLU(s->total_msg_size), s->total_time / s->finished_chunks, + // s->finished_packets, (double) s->total_hops / s->finished_chunks, s->busy_time, + // s->max_latency, s->min_latency); - //The check for dest group local routing has already been completed at this point + // lp_io_write(lp->gid, (char *) "dragonfly-msg-stats", written, s->output_buf); - Connection nextStopConn; - vector< Connection > poss_min_next_stops = get_legal_minimal_stops(s, bf, msg, lp, fdest_router_id); - vector< Connection > poss_intm_next_stops = get_legal_nonminimal_stops(s, bf, msg, lp, poss_min_next_stops, fdest_router_id); + if (s->terminal_msgs[0] != NULL) + printf("[%llu] leftover terminal messages \n", LLU(lp->gid)); + + // if(s->packet_gen != s->packet_fin) + // printf("\n generated %d finished %d ", s->packet_gen, s->packet_fin); + + if (s->rank_tbl) + qhash_finalize(s->rank_tbl); - if ( (poss_min_next_stops.size() == 0) && (poss_intm_next_stops.size() == 0)) - tw_error(TW_LOC, "No possible next stops!!!"); + rc_stack_destroy(s->st); + free(s->vc_occupancy); + free(s->terminal_msgs); + free(s->terminal_msgs_tail); +} - Connection best_min_conn, best_intm_conn; - ConnectionType conn_type_of_mins, conn_type_of_intms; +void dragonfly_plus_router_final(router_state *s, tw_lp *lp) +{ + // int max_gc_usage = 0; + // int min_gc_usage = INT_MAX; - //determine if it's a global or local channel next - if (poss_min_next_stops.size() > 0) - conn_type_of_mins = poss_min_next_stops[0].conn_type; //they should all be the same - if (poss_intm_next_stops.size() > 0) - conn_type_of_intms = poss_intm_next_stops[0].conn_type; + // int running_sum = 0; + // for(int i = 0; i < s->params->num_global_connections; i++) + // { + // int gc_val = s->gc_usage[i]; + // running_sum += gc_val; - if (conn_type_of_mins == CONN_GLOBAL) - best_min_conn = get_best_connection_from_conns(s, bf, msg, lp, poss_min_next_stops); - else - best_min_conn = get_absolute_best_connection_from_conns(s, bf, msg, lp, poss_min_next_stops); + // if (gc_val > max_gc_usage) + // max_gc_usage = gc_val; + // if (gc_val < min_gc_usage) + // min_gc_usage = gc_val; + // } + // double mean_gc_usage = (double) running_sum / (double) s->params->num_global_connections; - if (conn_type_of_intms == CONN_GLOBAL) - best_intm_conn = get_best_connection_from_conns(s, bf, msg, lp, poss_intm_next_stops); - else - best_intm_conn = get_absolute_best_connection_from_conns(s, bf, msg, lp, poss_intm_next_stops); + // if (s->dfp_router_type == SPINE) { + // printf("Router %d in group %d: Min GC Usage= %d Max GC Usage= %d Mean GC Usage= %.2f", s->router_id, s->router_id / s->params->num_routers, min_gc_usage, max_gc_usage, mean_gc_usage); + // printf("\t["); + // for(int i = 0; i < s->params->num_global_connections; i++) + // { + // printf("%d ",s->gc_usage[i]); + // } + // printf("]\n"); + // } - //if the best is intermediate, encode the intermediate router id in the message, set path type to non minimal - int min_score = dfp_score_connection(s, bf, msg, lp, best_min_conn, C_MIN); - int intm_score = dfp_score_connection(s, bf, msg, lp, best_intm_conn, C_NONMIN); +#if DEBUG_QOS + if(s->router_id == 0) + fclose(dragonfly_rtr_bw_log); +#endif - bool route_to_fdest = false; - if (msg->dfp_upward_channel_flag == 1) { //then we need to route to fdest, no questions asked. - route_to_fdest = true; - } - else { - if (scoring_preference == LOWER) { - if (min_score <= adaptive_threshold) { - route_to_fdest = true; - } - else if (min_score <= intm_score) { - route_to_fdest = true; + free(s->global_channel); + int i, j; + for (i = 0; i < s->params->radix; i++) { + for (j = 0; j < s->params->num_vcs; j++) { + if (s->queued_msgs[i][j] != NULL) { + printf("[%llu] leftover queued messages %d %d %d\n", LLU(lp->gid), i, j, + s->vc_occupancy[i][j]); } - } - else { //HIGHER is better - if (adaptive_threshold > 0) - tw_error(TW_LOC, "Adaptive threshold not compatible with HIGHER score preference yet\n"); //TODO fix this - if (min_score >= intm_score) { - route_to_fdest = true; + if (s->pending_msgs[i][j] != NULL) { + printf("[%llu] lefover pending messages %d %d\n", LLU(lp->gid), i, j); } } } - if (msg->dfp_upward_channel_flag == 0) { //if the flag is 1, then only minimal hops are considered - if (s->params->source_leaf_consider_nonmin == false) { //then we aren't supposed to let the source leaves consider any routes that wouldn't also be minimal - if (my_group_id == origin_group_id) { - if (s->dfp_router_type == LEAF) - route_to_fdest = true; //we aren't supposed to consider nonmin routes as the source leaf with given config - } - } - if (s->params->int_spine_consider_min == false) { //then we aren't supposed to let spines in intermediate group route to minimal even if possible - if (in_intermediate_group) { - if (s->dfp_router_type == SPINE) { - route_to_fdest = false; - } - } - } - if (my_group_id == fdest_group_id && s->dfp_router_type == SPINE) { - if (s->params->dest_spine_consider_nonmin == false && s->params->dest_spine_consider_global_nonmin == false) - route_to_fdest = true; + rc_stack_destroy(s->st); + + const dragonfly_plus_param *p = s->params; + int written = 0; + int src_rel_id = s->router_id % p->num_routers; + int local_grp_id = s->router_id / p->num_routers; + + for( int d = 0; d < p->intra_grp_radix; d++) + { + if (d != src_rel_id) { + int dest_ab_id = local_grp_id * p->num_routers + d; + written += sprintf(s->output_buf + written, "\n%d %s %d %s %s %llu %lf %lu", + s->router_id, + "R", + dest_ab_id, + "R", + "L", + LLU(s->link_traffic[d]), + s->busy_time[d], + s->stalled_chunks[d]); } } - if (route_to_fdest && (poss_min_next_stops.size() == 0)) - route_to_fdest = false; - if (!route_to_fdest && (poss_intm_next_stops.size() == 0)) - route_to_fdest = true; - - if (route_to_fdest){ - if (in_intermediate_group == true) - msg->dfp_upward_channel_flag = 1; //if we're in an intermediate group and take a turn toward the dest group, we flip the flag! - if ( (my_group_id == fdest_group_id) && (s->dfp_router_type == LEAF) && (my_router_id != fdest_router_id) ) - msg->dfp_upward_channel_flag = 1; //if we're a dest leaf but not the fdest leaf we must route using vl 1 + vector< Connection > my_global_links = s->connMan->get_connections_by_type(CONN_GLOBAL); + vector< Connection >::iterator it = my_global_links.begin(); - nextStopConn = best_min_conn; - } - else { - nextStopConn = best_intm_conn; - msg->path_type = NON_MINIMAL; + for(; it != my_global_links.end(); it++) + { + int dest_rtr_id = it->dest_gid; + int port_no = it->port; + assert(port_no >= 0 && port_no < p->radix); + written += sprintf(s->output_buf + written, "\n%d %s %d %s %s %llu %lf %lu", + s->router_id, + "R", + dest_rtr_id, + "R", + "G", + LLU(s->link_traffic[port_no]), + s->busy_time[port_no], + s->stalled_chunks[port_no]); } - if (nextStopConn.port == -1) - tw_error(TW_LOC, "DFP Prog Adaptive Routing: No valid next hop was chosen\n"); + sprintf(s->output_buf + written, "\n"); + lp_io_write(lp->gid, (char*)"dragonfly-plus-link-stats", written, s->output_buf); - return nextStopConn; + // /*MM: These statistics will need to be updated for dragonfly plus. + // * Especially the meta file information on router ports still have green + // * and black links. */ + // const dragonfly_plus_param *p = s->params; + // int written = 0; + // if (!s->router_id) { + // written = + // sprintf(s->output_buf, "# Format "); + // written += sprintf(s->output_buf + written, "\n# Router ports in the order: %d Intra Links, %d Inter Links %d Terminal Links. Hyphens for Unconnected ports (No terminals on Spine routers)", p->intra_grp_radix, p->num_global_connections, p->num_cn); + // } + + // char router_type[10]; + // if (s->dfp_router_type == LEAF) + // strcpy(router_type,"LEAF"); + // else if(s->dfp_router_type == SPINE) + // strcpy(router_type,"SPINE"); + + // written += sprintf(s->output_buf + written, "\n%s %llu %d %d", router_type, LLU(lp->gid), s->router_id / p->num_routers, s->router_id % p->num_routers); + // for (int d = 0; d < p->radix; d++) { + // bool printed_hyphen = false; + // ConnectionType port_type = s->connMan->get_port_type(d); + + // if (port_type == 0) { + // written += sprintf(s->output_buf + written, " -"); + // printed_hyphen = true; + // } + // if (printed_hyphen == false) + // written += sprintf(s->output_buf + written, " %lf", s->busy_time[d]); + // } + + // sprintf(s->output_buf + written, "\n"); + // lp_io_write(lp->gid, (char *) "dragonfly-plus-router-stats", written, s->output_buf); + + // written = 0; + // if (!s->router_id) { + // written = + // sprintf(s->output_buf2, "# Format "); + // written += sprintf(s->output_buf2 + written, "\n# Router ports in the order: %d Intra Links, %d Inter Links %d Terminal Links. Hyphens for Unconnected ports (No terminals on Spine routers)", p->intra_grp_radix, p->num_global_connections, p->num_cn); + // } + // written += sprintf(s->output_buf2 + written, "\n%s %llu %d %d", router_type, LLU(lp->gid), s->router_id / p->num_routers, s->router_id % p->num_routers); + + // for (int d = 0; d < p->radix; d++) { + // bool printed_hyphen = false; + // ConnectionType port_type = s->connMan->get_port_type(d); + + // if (port_type == 0) { + // written += sprintf(s->output_buf2 + written, " -"); + // printed_hyphen = true; + // } + // if (printed_hyphen == false) + // written += sprintf(s->output_buf2 + written, " %lld", LLD(s->link_traffic[d])); + // } + + // lp_io_write(lp->gid, (char *) "dragonfly-plus-router-traffic", written, s->output_buf2); + + // if (!g_tw_mynode) { + // if (s->router_id == 0) { + // if (PRINT_CONFIG) + // dragonfly_plus_print_params(s->params); + // } + // } } static Connection do_dfp_routing(router_state *s, @@ -3785,19 +3832,6 @@ static Connection do_dfp_routing(router_state *s, tw_error(TW_LOC, "do_dfp_routing(): No route chosen!\n"); } -static void do_dfp_routing_rc(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, int fdest_router_id) -{ - // for(int i = 0; i < msg->num_cll; i++) { - // codes_local_latency_reverse(lp); - // } - - // for(int i = 0; i < msg->num_rngs; i++) { - // tw_rand_reverse_unif(lp->rng); - // } - int my_group_id = s->router_id / s->params->num_routers; - int fdest_group_id = fdest_router_id / s->params->num_routers; -} - static void router_verify_valid_receipt(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) { if (msg->my_N_hop > s->params->max_hops_notify) @@ -3856,80 +3890,69 @@ static void router_verify_valid_receipt(router_state *s, tw_bf *bf, terminal_plu assert(has_valid_connection); } -static int get_next_router_vcg(router_state * s, tw_bf * bf, terminal_plus_message * msg, tw_lp * lp) +/*MM: This will also be used as is. This is meant to sent a credit back to the + * sending router. */ +/*When a packet is sent from the current router and a buffer slot becomes available, a credit is sent back to + * schedule another packet event*/ +static void router_credit_send(router_state *s, terminal_plus_message *msg, tw_lp *lp, int sq, short* rng_counter) { - int num_qos_levels = s->params->num_qos_levels; + tw_event *buf_e; + tw_stime ts; + terminal_plus_message *buf_msg; - int vcs_per_qos = s->params->num_vcs / num_qos_levels; - int output_port = msg->vc_index; - int vcg = 0; - int base_limit = 0; - - int chunk_size = s->params->chunk_size; - int bw_consumption[num_qos_levels]; - /* First make sure the bandwidth consumptions are up to date. */ - if(BW_MONITOR == 1) - { - for(int k = 0; k < num_qos_levels; k++) - { - if(s->qos_status[output_port][k] != Q_OVERBW) - { - bw_consumption[k] = get_rtr_bandwidth_consumption(s, k, output_port); - if(bw_consumption[k] > s->params->qos_bandwidths[k]) - { -// printf("\n Router %d QoS %d exceeded allowed bandwidth %d ", s->router_id, k, bw_consumption[k]); - if(k == 0) - msg->qos_reset1 = 1; - else if(k == 1) - msg->qos_reset2 = 1; + int dest = 0, type = R_BUFFER; + int is_terminal = 0; + double credit_delay; + + const dragonfly_plus_param *p = s->params; + + // Notify sender terminal about available buffer space + if (msg->last_hop == TERMINAL) { + dest = msg->src_terminal_id; + type = T_BUFFER; + is_terminal = 1; + credit_delay = p->cn_credit_delay; + } + else if (msg->last_hop == GLOBAL) { + dest = msg->intm_lp_id; + credit_delay = p->global_credit_delay; + } + else if (msg->last_hop == LOCAL) { + dest = msg->intm_lp_id; + credit_delay = p->local_credit_delay; + } + else + printf("\n Invalid message type"); - s->qos_status[output_port][k] = Q_OVERBW; - } - } - } - int vc_size = s->params->global_vc_size; - if(output_port < s->params->intra_grp_radix) - vc_size = s->params->local_vc_size; + (*rng_counter)++; + ts = g_tw_lookahead + credit_delay + tw_rand_unif(lp->rng); - /* TODO: If none of the vcg is exceeding bandwidth limit then select high - * priority traffic first. */ - for(int i = 0; i < num_qos_levels; i++) - { - if(s->qos_status[output_port][i] == Q_ACTIVE) - { - int base_limit = i * vcs_per_qos; - for(int k = base_limit; k < base_limit + vcs_per_qos; k ++) - { - if(s->pending_msgs[output_port][k] != NULL) - return k; - } - } - } - } - - /* All vcgs are exceeding their bandwidth limits*/ - msg->last_saved_qos = s->last_qos_lvl[output_port]; - int next_rr_vcg = (s->last_qos_lvl[output_port] + 1) % num_qos_levels; + if (is_terminal) { + buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_PLUS, (void **) &buf_msg, NULL); + buf_msg->magic = terminal_magic_num; + } + else { + buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY_PLUS_ROUTER, (void **) &buf_msg, NULL); + buf_msg->magic = router_magic_num; + } - for(int i = 0; i < num_qos_levels; i++) - { - base_limit = next_rr_vcg * vcs_per_qos; - for(int k = base_limit; k < base_limit + vcs_per_qos; k++) - { - if(s->pending_msgs[output_port][k] != NULL) - { - if(msg->last_saved_qos < 0) - msg->last_saved_qos = s->last_qos_lvl[output_port]; + buf_msg->origin_router_id = s->router_id; + if (sq == -1) { + buf_msg->vc_index = msg->vc_index; + buf_msg->output_chan = msg->output_chan; + } + else { + buf_msg->vc_index = msg->saved_vc; + buf_msg->output_chan = msg->saved_channel; + } - s->last_qos_lvl[output_port] = next_rr_vcg; - return k; - } - } - next_rr_vcg = (next_rr_vcg + 1) % num_qos_levels; - assert(next_rr_vcg < 2); - } - return -1; + strcpy(buf_msg->category, msg->category); + buf_msg->type = type; + + tw_event_send(buf_e); + return; } + static void router_packet_receive_rc(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) { router_rev_ecount++; @@ -3961,6 +3984,11 @@ static void router_packet_receive_rc(router_state *s, tw_bf *bf, terminal_plus_m } } if (bf->c4) { + s->stalled_chunks[output_port]--; + if(bf->c22) + { + s->last_buf_full[output_port] = msg->saved_busy_time; + } delete_terminal_plus_message_list( return_tail(s->queued_msgs[output_port], s->queued_msgs_tail[output_port], output_chan)); s->queued_count[output_port] -= s->params->chunk_size; @@ -4022,6 +4050,7 @@ static void router_packet_receive(router_state *s, tw_bf *bf, terminal_plus_mess } Connection next_stop_conn = do_dfp_routing(s, bf, &(cur_chunk->msg), lp, dest_router_id); + msg->num_rngs += (cur_chunk->msg).num_rngs; //make sure we're counting the rngs called during do_dfp_routing() if (s->connMan->is_any_connection_to(next_stop_conn.dest_gid) == false) tw_error(TW_LOC, "Router %d does not have a connection to chosen destination %d\n", s->router_id, next_stop_conn.dest_gid); @@ -4123,11 +4152,24 @@ static void router_packet_receive(router_state *s, tw_bf *bf, terminal_plus_mess } else { bf->c4 = 1; + s->stalled_chunks[output_port]++; cur_chunk->msg.saved_vc = msg->vc_index; cur_chunk->msg.saved_channel = msg->output_chan; append_to_terminal_plus_message_list(s->queued_msgs[output_port], s->queued_msgs_tail[output_port], output_chan, cur_chunk); s->queued_count[output_port] += s->params->chunk_size; + + //THIS WAS REMOVED WHEN QOS WAS INSTITUTED - READDED 5/20/19 + /* a check for pending msgs is non-empty then we dont set anything. If + * that is empty then we check if last_buf_full is set or not. If already + * set then we don't overwrite it. If two packets arrive next to each other + * then the first person should be setting it. */ + if(s->pending_msgs[output_port][output_chan] == NULL && s->last_buf_full[output_port] == 0.0) + { + bf->c22 = 1; + msg->saved_busy_time = s->last_buf_full[output_port]; + s->last_buf_full[output_port] = tw_now(lp); + } } msg->saved_vc = output_port; @@ -4230,7 +4272,7 @@ static void router_packet_send(router_state *s, tw_bf *bf, terminal_plus_message msg->num_rngs = 0; int num_qos_levels = s->params->num_qos_levels; - int output_chan = get_next_router_vcg(s, bf, msg, lp); + int output_chan = get_next_router_vcg(s, bf, msg, lp); //includes default output_chan setting functionality msg->saved_vc = output_port; msg->saved_channel = output_chan; @@ -4238,7 +4280,7 @@ static void router_packet_send(router_state *s, tw_bf *bf, terminal_plus_message if(output_chan < 0) { bf->c1 = 1; s->in_send_loop[output_port] = 0; - if(s->queued_count[output_port] && !s->last_buf_full[output_port]) + if(s->queued_count[output_port] && !s->last_buf_full[output_port]) //10-31-19, not sure why this was added here with the qos stuff { bf->c2 = 1; msg->saved_busy_time = s->last_buf_full[output_port]; @@ -4251,7 +4293,7 @@ static void router_packet_send(router_state *s, tw_bf *bf, terminal_plus_message assert(cur_entry != NULL); - if(s->last_buf_full[output_port]) + if(s->last_buf_full[output_port]) //10-31-19, same here as above comment { bf->c8 = 1; msg->saved_rcv_time = s->busy_time[output_port]; @@ -4290,7 +4332,7 @@ static void router_packet_send(router_state *s, tw_bf *bf, terminal_plus_message double bytetime = delay; if (cur_entry->msg.packet_size == 0) - bytetime = bytes_to_ns(CREDIT_SIZE, bandwidth); + bytetime = bytes_to_ns(s->params->credit_size, bandwidth); if ((cur_entry->msg.packet_size < s->params->chunk_size) && (cur_entry->msg.chunk_id == num_chunks - 1)) bytetime = bytes_to_ns(cur_entry->msg.packet_size % s->params->chunk_size, bandwidth); @@ -4405,6 +4447,7 @@ static void router_buf_update_rc(router_state *s, tw_bf *bf, terminal_plus_messa s->busy_time[indx] = msg->saved_rcv_time; s->busy_time_sample[indx] = msg->saved_sample_time; s->ross_rsample.busy_time[indx] = msg->saved_sample_time; + s->busy_time_ross_sample[indx] = msg->saved_busy_time_ross; s->last_buf_full[indx] = msg->saved_busy_time; } if (bf->c1) { @@ -4434,9 +4477,11 @@ static void router_buf_update(router_state *s, tw_bf *bf, terminal_plus_message msg->saved_rcv_time = s->busy_time[indx]; msg->saved_busy_time = s->last_buf_full[indx]; msg->saved_sample_time = s->busy_time_sample[indx]; + msg->saved_busy_time_ross = s->busy_time_ross_sample[indx]; s->busy_time[indx] += (tw_now(lp) - s->last_buf_full[indx]); s->busy_time_sample[indx] += (tw_now(lp) - s->last_buf_full[indx]); s->ross_rsample.busy_time[indx] += (tw_now(lp) - s->last_buf_full[indx]); + s->busy_time_ross_sample[indx] += (tw_now(lp) - s->last_buf_full[indx]); s->last_buf_full[indx] = 0.0; } if (s->queued_msgs[indx][output_chan] != NULL) { @@ -4466,6 +4511,40 @@ static void router_buf_update(router_state *s, tw_bf *bf, terminal_plus_message return; } +void terminal_plus_event(terminal_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) +{ + s->fwd_events++; + //*(int *)bf = (int)0; + assert(msg->magic == terminal_magic_num); + + rc_stack_gc(lp, s->st); + switch (msg->type) { + case T_GENERATE: + packet_generate(s, bf, msg, lp); + break; + + case T_ARRIVE: + packet_arrive(s, bf, msg, lp); + break; + + case T_SEND: + packet_send(s, bf, msg, lp); + break; + + case T_BUFFER: + terminal_buf_update(s, bf, msg, lp); + break; + + case T_BANDWIDTH: + issue_bw_monitor_event_rc(s, bf, msg, lp); + break; + + default: + printf("\n LP %d Terminal message type not supported %d ", (int) lp->gid, msg->type); + tw_error(TW_LOC, "Msg type not supported"); + } +} + void router_plus_event(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp) { s->fwd_events++; @@ -4567,7 +4646,7 @@ tw_lptype dragonfly_plus_lps[] = { sizeof(terminal_state), }, { - (init_f) router_plus_setup, + (init_f) router_plus_init, (pre_run_f) NULL, (event_f) router_plus_event, (revent_f) router_plus_rc_event_handler, @@ -4600,202 +4679,296 @@ static void router_plus_register(tw_lptype *base_type) lp_type_register(LP_CONFIG_NM_ROUT, base_type); } -/* ROSS Instrumentation layer */ -// virtual time sampling callback - router forward -static void ross_dfly_plus_rsample_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample) +/* Routing Functions */ +static int get_min_hops_to_dest_from_conn(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, Connection conn) { - (void)lp; - (void)bf; - - const dragonfly_plus_param * p = s->params; - int i = 0; + int my_type = s->dfp_router_type; + int next_hops_type = dragonfly_plus_get_router_type(conn.dest_gid, s->params); - sample->router_id = s->router_id; - sample->end_time = tw_now(lp); - sample->fwd_events = s->fwd_events; - sample->rev_events = s->rev_events; - sample->busy_time = (tw_stime*)((&sample->rev_events) + 1); - sample->link_traffic_sample = (int64_t*)((&sample->busy_time[0]) + p->radix); + int dfp_dest_terminal_id = msg->dfp_dest_terminal_id; + int fdest_router_id = dragonfly_plus_get_assigned_router_id(dfp_dest_terminal_id, s->params); + int fdest_group_id = fdest_router_id / s->params->num_routers; - for(; i < p->radix; i++) + if (msg->dfp_upward_channel_flag) { - sample->busy_time[i] = s->ross_rsample.busy_time[i]; - sample->link_traffic_sample[i] = s->ross_rsample.link_traffic_sample[i]; + if (my_type == SPINE) + return 2; //Next Spine -> Leaf -> dest_term + else + return 3; //Next Spine -> Spine -> Leaf -> dest_term } - /* clear up the current router stats */ - s->fwd_events = 0; - s->rev_events = 0; - - for( i = 0; i < p->radix; i++) - { - s->ross_rsample.busy_time[i] = 0; - s->ross_rsample.link_traffic_sample[i] = 0; + if(conn.dest_group_id == fdest_group_id) { + if (next_hops_type == SPINE) + return 2; //Next Spine -> Leaf -> dest_term + else { + assert(next_hops_type == LEAF); + return 1; //Next Leaf -> dest_term + } + } + else { //next is not in final destination group + if (next_hops_type == SPINE) { + vector< Connection > cons_to_dest_group = connManagerList[conn.dest_gid].get_connections_to_group(fdest_group_id); + if (cons_to_dest_group.size() == 0) + return 5; //Next Spine -> Leaf -> Spine -> Spine -> Leaf -> dest_term + else + return 3; //Next Spine -> Spine -> Leaf -> dest_term + } + else { + assert(next_hops_type == LEAF); + return 4; //Next Leaf -> Spine -> Spine -> Leaf -> dest_term + } } } -// virtual time sampling callback - router reverse -static void ross_dfly_plus_rsample_rc_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample) +//Returns a vector of connections that are legal dragonfly plus routes that specifically would not allow for a minimal connection to the specific router specified in get_possible_stops_to_specific_router() +//Be very wary of using this method, results may not make sense if possible_minimal_stops is not a vector of minimal next stops to fdest_rotuer_id +//Nonminimal specifically refers to any move that does not move directly toward the destination router +static vector< Connection > get_legal_nonminimal_stops(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, vector< Connection > possible_minimal_stops, int fdest_router_id) { - (void)lp; - (void)bf; - - const dragonfly_plus_param * p = s->params; - int i =0; + int my_router_id = s->router_id; + int my_group_id = s->router_id / s->params->num_routers; + int origin_group_id = msg->origin_router_id / s->params->num_routers; + int fdest_group_id = fdest_router_id / s->params->num_routers; + bool in_intermediate_group = (my_group_id != origin_group_id) && (my_group_id != fdest_group_id); - for(; i < p->radix; i++) - { - s->ross_rsample.busy_time[i] = sample->busy_time[i]; - s->ross_rsample.link_traffic_sample[i] = sample->link_traffic_sample[i]; + + vector< Connection > possible_nonminimal_stops; + + if (my_group_id == origin_group_id) { //then we're just sending upward out of the group + if (s->dfp_router_type == LEAF) { //then any connections to spines that are not in possible_minimal_stops should be included + vector< Connection > conns_to_spines = s->connMan->get_connections_by_type(CONN_LOCAL); + possible_nonminimal_stops = set_difference_vectors(conns_to_spines, possible_minimal_stops); //get the complement of possible_minimal_stops + } + else if (s->dfp_router_type == SPINE) { //then we have to send via global connections that aren't to the dest group + vector< Connection > conns_to_other_groups = s->connMan->get_connections_by_type(CONN_GLOBAL); + possible_nonminimal_stops = set_difference_vectors(conns_to_other_groups, possible_minimal_stops); + } } + else if (in_intermediate_group) { + if (msg->dfp_upward_channel_flag == 1) { //then we return an empty vector as the only legal moves are those that already exist in possible_minimal_stops + assert(possible_nonminimal_stops.size() == 0); + return possible_nonminimal_stops; + } + else if (s->dfp_router_type == LEAF) { //if we're a leaf in an intermediate group then the routing alg should have flipped the dfp_upward channel already but lets return empty just in case + assert(possible_nonminimal_stops.size() == 0); + return possible_nonminimal_stops; + } + else if (s->dfp_router_type == SPINE) { //then possible_minimal_stops will be the list of connections + assert(msg->dfp_upward_channel_flag == 0); + vector< Connection> conns_to_leaves = s->connMan->get_connections_by_type(CONN_LOCAL); + possible_nonminimal_stops = set_difference_vectors(conns_to_leaves, possible_minimal_stops); //get the complement of possible_minimal_stops + } + else { + tw_error(TW_LOC, "Impossible Error - Something's majorly wrong if this is tripped"); + } - s->fwd_events = sample->fwd_events; - s->rev_events = sample->rev_events; -} + } + else if (my_group_id == fdest_group_id) { + if (s->params->dest_spine_consider_nonmin == true || s->params->dest_spine_consider_global_nonmin == true) { + if (s->dfp_router_type == SPINE) { + vector< Connection > poss_next_conns; -// virtual time sampling callback - terminal forward -static void ross_dfly_plus_sample_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample) -{ - (void)lp; - (void)bf; - - sample->terminal_id = s->terminal_id; - sample->fin_chunks_sample = s->ross_sample.fin_chunks_sample; - sample->data_size_sample = s->ross_sample.data_size_sample; - sample->fin_hops_sample = s->ross_sample.fin_hops_sample; - sample->fin_chunks_time = s->ross_sample.fin_chunks_time; - sample->busy_time_sample = s->ross_sample.busy_time_sample; - sample->end_time = tw_now(lp); - sample->fwd_events = s->fwd_events; - sample->rev_events = s->rev_events; + if (s->params->dest_spine_consider_nonmin == true) { + vector< Connection > conns_to_leaves = s->connMan->get_connections_by_type(CONN_LOCAL); + for (int i = 0; i < conns_to_leaves.size(); i++) + { + if (conns_to_leaves[i].dest_gid != fdest_router_id) + poss_next_conns.push_back(conns_to_leaves[i]); + } + } + if (s->params->dest_spine_consider_global_nonmin == true) { + vector< Connection > conns_to_spines = s->connMan->get_connections_by_type(CONN_GLOBAL); + for (int i = 0; i < conns_to_spines.size(); i++) + { + if (conns_to_spines[i].dest_group_id != fdest_group_id && conns_to_spines[i].dest_group_id != origin_group_id) { + poss_next_conns.push_back(conns_to_spines[i]); + } + } + } + + return poss_next_conns; + } + else { + assert(s->dfp_router_type == LEAF); + assert(possible_nonminimal_stops.size() == 0); //empty because a leaf in the destination group has no legal nonminimal moves + return possible_nonminimal_stops; + } - s->ross_sample.fin_chunks_sample = 0; - s->ross_sample.data_size_sample = 0; - s->ross_sample.fin_hops_sample = 0; - s->fwd_events = 0; - s->rev_events = 0; - s->ross_sample.fin_chunks_time = 0; - s->ross_sample.busy_time_sample = 0; -} -// virtual time sampling callback - terminal reverse -static void ross_dfly_plus_sample_rc_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample) -{ - (void)lp; - (void)bf; + } + } + else { + tw_error(TW_LOC, "Invalid group classification\n"); + } - s->ross_sample.busy_time_sample = sample->busy_time_sample; - s->ross_sample.fin_chunks_time = sample->fin_chunks_time; - s->ross_sample.fin_hops_sample = sample->fin_hops_sample; - s->ross_sample.data_size_sample = sample->data_size_sample; - s->ross_sample.fin_chunks_sample = sample->fin_chunks_sample; - s->fwd_events = sample->fwd_events; - s->rev_events = sample->rev_events; + // assert(possible_nonminimal_stops.size() > 0); + return possible_nonminimal_stops; } -// event tracing callback - used router and terminal LPs -void dfly_plus_event_collect(terminal_plus_message *m, tw_lp *lp, char *buffer, int *collect_flag) +//The term minimal in this case refers to "Moving toward destination router". If a packet is at an intermediate spine and that spine doesn't +//have a direct connection to the destinatino group, then there wouldn't be any "legal minimal stops". The packet would have to continue to +//some leaf in the group first. THIS DOES NOT INCLUDE REROUTING. The only time an intermediate leaf can send to an intermediate spine is if +//dfp_upward_channel_flag is 0. +static vector< Connection > get_legal_minimal_stops(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, int fdest_router_id) { - (void)lp; - (void)collect_flag; + int my_router_id = s->router_id; + int my_group_id = s->router_id / s->params->num_routers; + int origin_group_id = msg->origin_router_id / s->params->num_routers; + int fdest_group_id = fdest_router_id / s->params->num_routers; - int type = (int) m->type; - memcpy(buffer, &type, sizeof(type)); + if (my_group_id != fdest_group_id) { //then we're in the source or the intermediate group. + if (s->dfp_router_type == LEAF) { + vector< Connection> possible_next_conns_to_group; + set poss_router_id_set_to_group; + for(int i = 0; i < connectionList[my_group_id][fdest_group_id].size(); i++) + { + int poss_router_id = connectionList[my_group_id][fdest_group_id][i]; + // printf("%d\n",poss_router_id); + if (poss_router_id_set_to_group.count(poss_router_id) == 0) { //if we haven't added the connections from poss_router_id yet + vector< Connection > conns = s->connMan->get_connections_to_gid(poss_router_id, CONN_LOCAL); + poss_router_id_set_to_group.insert(poss_router_id); + possible_next_conns_to_group.insert(possible_next_conns_to_group.end(), conns.begin(), conns.end()); + } + } + return possible_next_conns_to_group; + } + else if (s->dfp_router_type == SPINE) { + return s->connMan->get_connections_to_group(fdest_group_id); + } + } + else { + assert(my_group_id == fdest_group_id); + if (s->dfp_router_type == SPINE) { + vector< Connection > possible_next_conns = s->connMan->get_connections_to_gid(fdest_router_id, CONN_LOCAL); + return possible_next_conns; + } + else { + assert(s->dfp_router_type == LEAF); + + if (my_router_id != fdest_router_id) { //then we're also the source group and we need to send to any spine in our group + assert(my_group_id == origin_group_id); + return s->connMan->get_connections_by_type(CONN_LOCAL); + } + else { //then we're the dest router + assert(my_router_id == fdest_router_id); + vector< Connection > empty; + return empty; + } + } + } + vector< Connection > empty; + return empty; } -// GVT-based and real time sampling callback for terminals -void dfly_plus_model_stat_collect(terminal_state *s, tw_lp *lp, char *buffer) + +static Connection do_dfp_prog_adaptive_routing(router_state *s, tw_bf *bf, terminal_plus_message *msg, tw_lp *lp, int fdest_router_id) { - (void)lp; + int my_router_id = s->router_id; + int my_group_id = s->router_id / s->params->num_routers; + int fdest_group_id = fdest_router_id / s->params->num_routers; + int origin_group_id = msg->origin_router_id / s->params->num_routers; + bool in_intermediate_group = (my_group_id != origin_group_id) && (my_group_id != fdest_group_id); + bool outside_source_group = (my_group_id != origin_group_id); + int adaptive_threshold = s->params->adaptive_threshold; + bool next_hop_is_global = false; - int index = 0; - tw_lpid id = 0; - long tmp = 0; - tw_stime tmp2 = 0; - - id = s->terminal_id; - memcpy(&buffer[index], &id, sizeof(id)); - index += sizeof(id); + //The check for dest group local routing has already been completed at this point - tmp = s->fin_chunks_ross_sample; - memcpy(&buffer[index], &tmp, sizeof(tmp)); - index += sizeof(tmp); - s->fin_chunks_ross_sample = 0; + Connection nextStopConn; + vector< Connection > poss_min_next_stops = get_legal_minimal_stops(s, bf, msg, lp, fdest_router_id); + vector< Connection > poss_intm_next_stops = get_legal_nonminimal_stops(s, bf, msg, lp, poss_min_next_stops, fdest_router_id); - tmp = s->data_size_ross_sample; - memcpy(&buffer[index], &tmp, sizeof(tmp)); - index += sizeof(tmp); - s->data_size_ross_sample = 0; + if ( (poss_min_next_stops.size() == 0) && (poss_intm_next_stops.size() == 0)) + tw_error(TW_LOC, "No possible next stops!!!"); - tmp = s->fin_hops_ross_sample; - memcpy(&buffer[index], &tmp, sizeof(tmp)); - index += sizeof(tmp); - s->fin_hops_ross_sample = 0; + Connection best_min_conn, best_intm_conn; + ConnectionType conn_type_of_mins, conn_type_of_intms; - tmp2 = s->fin_chunks_time_ross_sample; - memcpy(&buffer[index], &tmp2, sizeof(tmp2)); - index += sizeof(tmp2); - s->fin_chunks_time_ross_sample = 0; + //determine if it's a global or local channel next + if (poss_min_next_stops.size() > 0) + conn_type_of_mins = poss_min_next_stops[0].conn_type; //they should all be the same + if (poss_intm_next_stops.size() > 0) + conn_type_of_intms = poss_intm_next_stops[0].conn_type; - tmp2 = s->busy_time_ross_sample; - memcpy(&buffer[index], &tmp2, sizeof(tmp2)); - index += sizeof(tmp2); - s->busy_time_ross_sample = 0; + if (conn_type_of_mins == CONN_GLOBAL) + best_min_conn = get_best_connection_from_conns(s, bf, msg, lp, poss_min_next_stops); + else + best_min_conn = get_absolute_best_connection_from_conns(s, bf, msg, lp, poss_min_next_stops); - return; -} + if (conn_type_of_intms == CONN_GLOBAL) + best_intm_conn = get_best_connection_from_conns(s, bf, msg, lp, poss_intm_next_stops); + else + best_intm_conn = get_absolute_best_connection_from_conns(s, bf, msg, lp, poss_intm_next_stops); -// GVT-based and real time sampling callback for routers -void dfly_plus_router_model_stat_collect(router_state *s, tw_lp *lp, char *buffer) -{ - (void)lp; + //if the best is intermediate, encode the intermediate router id in the message, set path type to non minimal + int min_score = dfp_score_connection(s, bf, msg, lp, best_min_conn, C_MIN); + int intm_score = dfp_score_connection(s, bf, msg, lp, best_intm_conn, C_NONMIN); - const dragonfly_plus_param * p = s->params; - int i, index = 0; + bool route_to_fdest = false; + if (msg->dfp_upward_channel_flag == 1) { //then we need to route to fdest, no questions asked. + route_to_fdest = true; + } + else { + if (scoring_preference == LOWER) { + if (min_score <= adaptive_threshold) { + route_to_fdest = true; + } + else if (min_score <= intm_score) { + route_to_fdest = true; + } + } + else { //HIGHER is better + if (adaptive_threshold > 0) + tw_error(TW_LOC, "Adaptive threshold not compatible with HIGHER score preference yet\n"); //TODO fix this + if (min_score >= intm_score) { + route_to_fdest = true; + } + } + } - tw_lpid id = 0; - tw_stime tmp = 0; - int64_t tmp2 = 0; + if (msg->dfp_upward_channel_flag == 0) { //if the flag is 1, then only minimal hops are considered + if (s->params->source_leaf_consider_nonmin == false) { //then we aren't supposed to let the source leaves consider any routes that wouldn't also be minimal + if (my_group_id == origin_group_id) { + if (s->dfp_router_type == LEAF) + route_to_fdest = true; //we aren't supposed to consider nonmin routes as the source leaf with given config + } + } + if (s->params->int_spine_consider_min == false) { //then we aren't supposed to let spines in intermediate group route to minimal even if possible + if (in_intermediate_group) { + if (s->dfp_router_type == SPINE) { + route_to_fdest = false; + } + } + } + if (my_group_id == fdest_group_id && s->dfp_router_type == SPINE) { + if (s->params->dest_spine_consider_nonmin == false && s->params->dest_spine_consider_global_nonmin == false) + route_to_fdest = true; + } + } - id = s->router_id; - memcpy(&buffer[index], &id, sizeof(id)); - index += sizeof(id); + if (route_to_fdest && (poss_min_next_stops.size() == 0)) + route_to_fdest = false; + if (!route_to_fdest && (poss_intm_next_stops.size() == 0)) + route_to_fdest = true; - for(i = 0; i < p->radix; i++) - { - tmp = s->busy_time_ross_sample[i]; - memcpy(&buffer[index], &tmp, sizeof(tmp)); - index += sizeof(tmp); - s->busy_time_ross_sample[i] = 0; + if (route_to_fdest){ + if (in_intermediate_group == true) + msg->dfp_upward_channel_flag = 1; //if we're in an intermediate group and take a turn toward the dest group, we flip the flag! + if ( (my_group_id == fdest_group_id) && (s->dfp_router_type == LEAF) && (my_router_id != fdest_router_id) ) + msg->dfp_upward_channel_flag = 1; //if we're a dest leaf but not the fdest leaf we must route using vl 1 - tmp2 = s->link_traffic_ross_sample[i]; - memcpy(&buffer[index], &tmp2, sizeof(tmp2)); - index += sizeof(tmp2); - s->link_traffic_ross_sample[i] = 0; + nextStopConn = best_min_conn; + } + else { + nextStopConn = best_intm_conn; + msg->path_type = NON_MINIMAL; } - return; -} - -static const st_model_types *dfly_plus_get_model_types(void) -{ - return(&dfly_plus_model_types[0]); -} - -static const st_model_types *dfly_plus_router_get_model_types(void) -{ - return(&dfly_plus_model_types[1]); -} -static void dfly_plus_register_model_types(st_model_types *base_type) -{ - st_model_type_register(LP_CONFIG_NM_TERM, base_type); -} + if (nextStopConn.port == -1) + tw_error(TW_LOC, "DFP Prog Adaptive Routing: No valid next hop was chosen\n"); -static void dfly_plus_router_register_model_types(st_model_types *base_type) -{ - st_model_type_register(LP_CONFIG_NM_ROUT, base_type); + return nextStopConn; } -/*** END of ROSS Instrumentation support */ extern "C" { /* data structure for dragonfly statistics */ diff --git a/src/networks/model-net/dragonfly.c b/src/networks/model-net/dragonfly.c index f647a1ec..eb5e81bb 100644 --- a/src/networks/model-net/dragonfly.c +++ b/src/networks/model-net/dragonfly.c @@ -27,7 +27,6 @@ #include #endif -#define CREDIT_SZ 8 #define MEAN_PROCESS 1.0 /* collective specific parameters */ @@ -138,7 +137,10 @@ struct dragonfly_param double cn_delay; double local_delay; double global_delay; - double credit_delay; + int credit_size; + double local_credit_delay; + double global_credit_delay; + double cn_credit_delay; double router_delay; }; @@ -640,7 +642,82 @@ static void dragonfly_read_config(const char * anno, dragonfly_param *params){ p->cn_delay = bytes_to_ns(p->chunk_size, p->cn_bandwidth); p->local_delay = bytes_to_ns(p->chunk_size, p->local_bandwidth); p->global_delay = bytes_to_ns(p->chunk_size, p->global_bandwidth); - p->credit_delay = bytes_to_ns(CREDIT_SZ, p->local_bandwidth); //assume 8 bytes packet + + //CREDIT DELAY CONFIGURATION LOGIC ------------ + int myRank; + MPI_Comm_rank(MPI_COMM_CODES, &myRank); + + rc = configuration_get_value_int(&config, "PARAMS", "credit_size", anno, &p->credit_size); + if (rc) { + p->credit_size = 8; + if(!myRank) + fprintf(stderr, "credit_size not specified, using default: %d\n", p->credit_size); + } + + double general_credit_delay; + int credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "credit_delay", anno, &general_credit_delay); + int local_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "local_credit_delay", anno, &p->local_credit_delay); + int global_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "global_credit_delay", anno, &p->global_credit_delay); + int cn_credit_delay_unset = configuration_get_value_double(&config, "PARAMS", "cn_credit_delay", anno, &p->cn_credit_delay); + + int auto_credit_delay_flag; + rc = configuration_get_value_int(&config, "PARAMS", "auto_credit_delay", anno, &auto_credit_delay_flag); + if (rc) { + auto_credit_delay_flag = 0; + } + else { + if(!myRank && auto_credit_delay_flag) + fprintf(stderr, "auto_credit_delay flag enabled. All credit delays will be calculated based on their respective bandwidths\n"); + } + + //If the user specifies a general "credit_delay" AND any of the more specific credit delays, throw an error to make sure they correct their configuration + if (!credit_delay_unset && !(local_credit_delay_unset || global_credit_delay_unset || cn_credit_delay_unset)) + tw_error(TW_LOC, "\nCannot set both a general credit delay and specific (local/global/cn) credit delays. Check configuration file."); + + //If the user specifies ANY credit delays general or otherwise AND has the auto credit delay flag enabled, throw an error to make sure they correct the conflicting configuration + if ((!credit_delay_unset || !local_credit_delay_unset || !global_credit_delay_unset || !cn_credit_delay_unset) && auto_credit_delay_flag) + tw_error(TW_LOC, "\nCannot set both a credit delay (general or specific) and also enable auto credit delay calculation. Check Configuration file."); + + //If the user doesn't specify either general or specific credit delays - calculate credit delay based on local bandwidth. + //This is old legacy behavior that is left in to make sure that the credit delay configurations of old aren't semantically different + //Other possible way to program this would be to make each credit delay be set based on their respective bandwidths but this semantically + //changes the behavior of old configuration files. (although it would be more accurate) + if (credit_delay_unset && local_credit_delay_unset && global_credit_delay_unset && cn_credit_delay_unset && !auto_credit_delay_flag) { + p->local_credit_delay = bytes_to_ns(p->credit_size, p->local_bandwidth); + p->global_credit_delay = p->local_credit_delay; + p->cn_credit_delay = p->local_credit_delay; + if(!myRank) + fprintf(stderr, "no credit_delay specified - all credit delays set to %.2f\n",p->local_credit_delay); + } + //If the user doesn't specify a general credit delay but leaves any of the specific credit delay values unset, then we need to set those (the above conditional handles if none of them had been set) + else if (credit_delay_unset) { + if (local_credit_delay_unset) { + p->local_credit_delay = bytes_to_ns(p->credit_size, p->local_bandwidth); + if(!myRank && !auto_credit_delay_flag) //if the auto credit delay flag is true then we've already printed what we're going to do + fprintf(stderr, "local_credit_delay not specified, using calculation based on local bandwidth: %.2f\n", p->local_credit_delay); + } + if (global_credit_delay_unset) { + p->global_credit_delay = bytes_to_ns(p->credit_size, p->global_bandwidth); + if(!myRank && !auto_credit_delay_flag) + fprintf(stderr, "global_credit_delay not specified, using calculation based on global bandwidth: %.2f\n", p->global_credit_delay); + } + if (cn_credit_delay_unset) { + p->cn_credit_delay = bytes_to_ns(p->credit_size, p->cn_bandwidth); + if(!myRank && !auto_credit_delay_flag) + fprintf(stderr, "cn_credit_delay not specified, using calculation based on cn bandwidth: %.2f\n", p->cn_credit_delay); + } + } + //If the user specifies a general credit delay (but didn't specify any specific credit delays) then we set all specific credit delays to the general + else if (!credit_delay_unset) { + p->local_credit_delay = general_credit_delay; + p->global_credit_delay = general_credit_delay; + p->cn_credit_delay = general_credit_delay; + + if(!myRank) + fprintf(stderr, "general credit_delay specified - all credit delays set to %.2f\n",general_credit_delay); + } + //END CREDIT DELAY CONFIGURATION LOGIC ---------------- + } static void dragonfly_configure(){ @@ -1114,6 +1191,7 @@ static void router_credit_send(router_state * s, terminal_message * msg, int dest = 0, type = R_BUFFER; int is_terminal = 0; + double credit_delay; const dragonfly_param *p = s->params; @@ -1122,14 +1200,20 @@ static void router_credit_send(router_state * s, terminal_message * msg, dest = msg->src_terminal_id; type = T_BUFFER; is_terminal = 1; - } else if(msg->last_hop == GLOBAL) { + credit_delay = p->cn_credit_delay; + } + else if(msg->last_hop == GLOBAL) { dest = msg->intm_lp_id; - } else if(msg->last_hop == LOCAL) { + credit_delay = p->global_credit_delay; + } + else if(msg->last_hop == LOCAL) { dest = msg->intm_lp_id; - } else + credit_delay = p->local_credit_delay; + } + else printf("\n Invalid message type"); - ts = g_tw_lookahead + p->credit_delay + tw_rand_unif(lp->rng); + ts = g_tw_lookahead + credit_delay + tw_rand_unif(lp->rng); if (is_terminal) { buf_e = model_net_method_event_new(dest, ts, lp, DRAGONFLY, @@ -1631,7 +1715,7 @@ static void packet_arrive(terminal_state * s, tw_bf * bf, terminal_message * msg if(msg->packet_ID == LLU(TRACK_PKT)) printf("\n Packet %llu arrived at lp %llu hops %d", msg->packet_ID, LLU(lp->gid), msg->my_N_hop); - tw_stime ts = g_tw_lookahead + s->params->credit_delay + tw_rand_unif(lp->rng); + tw_stime ts = g_tw_lookahead + s->params->cn_credit_delay + tw_rand_unif(lp->rng); // no method_event here - message going to router tw_event * buf_e; diff --git a/src/networks/model-net/express-mesh.C b/src/networks/model-net/express-mesh.C index 20e0ceb1..1e36afd8 100644 --- a/src/networks/model-net/express-mesh.C +++ b/src/networks/model-net/express-mesh.C @@ -1438,7 +1438,7 @@ terminal_final( terminal_state * s, tw_lp * lp ) written = sprintf(s->output_buf, "# Format <# Flits/Packets finished> "); written += sprintf(s->output_buf + written, "\n %llu %u %llu %lf %ld %lf %lf", - LLU(lp->gid), s->terminal_id, s->total_msg_size, s->total_time, + LLU(lp->gid), s->terminal_id, LLU(s->total_msg_size), s->total_time, s->finished_packets, (double)s->total_hops/s->finished_chunks, s->busy_time); diff --git a/src/networks/model-net/fattree.c b/src/networks/model-net/fattree.c index 0089ad4e..eb1c49b5 100644 --- a/src/networks/model-net/fattree.c +++ b/src/networks/model-net/fattree.c @@ -2794,7 +2794,7 @@ void fattree_terminal_final( ft_terminal_state * s, tw_lp * lp ) written = sprintf(s->output_buf, "# Format <# Flits/Packets finished> \n"); written += sprintf(s->output_buf + written, "%llu %u %u %llu %lf %ld %lf %lf\n", - LLU(lp->gid), s->terminal_id, s->rail_id, s->total_msg_size, s->total_time, + LLU(lp->gid), s->terminal_id, s->rail_id, LLU(s->total_msg_size), s->total_time, s->finished_packets, (double)s->total_hops/s->finished_chunks, s->busy_time[0]); diff --git a/src/networks/model-net/slimfly.c b/src/networks/model-net/slimfly.c index 056f7989..da122ec6 100644 --- a/src/networks/model-net/slimfly.c +++ b/src/networks/model-net/slimfly.c @@ -333,7 +333,7 @@ static void ross_slimfly_rsample_fn(router_state * s, tw_bf * bf, tw_lp * lp, st static void ross_slimfly_rsample_rc_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct slimfly_router_sample *sample); int get_path_length_from_terminal(int src, int dest, const slimfly_param *p); void get_router_connections(int src_router_id, int num_global_channels, int num_local_channels, - int total_routers, int* local_channels, int* global_channels, int sf_type, slimfly_param * p); + int total_routers, int* local_channels, int* global_channels, int sf_type, const slimfly_param * p); st_model_types slimfly_model_types[] = { {(ev_trace_f) slimfly_event_collect, @@ -910,8 +910,6 @@ void slim_terminal_init( terminal_state * s, int num_routers = codes_mapping_get_lp_count(lp_group_name, 0 ,"modelnet_slimfly_router", s->anno, 0); - int num_routers_per_rep = codes_mapping_get_lp_count(lp_group_name, 1, LP_CONFIG_NM_ROUT, s->anno, 0); - int num_routers_per_rail = num_routers / s->params->ports_per_nic; #if MSG_TIMES @@ -2244,7 +2242,7 @@ void slimfly_router_final(router_state * s, * @param[in] *global_channels Integer array pointer for storing the global connections */ void get_router_connections(int src_router_id, int num_global_channels, int num_local_channels, - int total_routers, int* local_channels, int* global_channels, int sf_type, slimfly_param *p){ + int total_routers, int* local_channels, int* global_channels, int sf_type, const slimfly_param *p){ //Compute MMS router layout/connection graph int rid_s = src_router_id; // ID for source router int num_rails = 1; @@ -2953,7 +2951,7 @@ tw_lpid slim_get_next_stop(router_state * s, int intm_id) { // printf("slim get next stop\n"); - + (void)lp; (void)msg; (void)bf; diff --git a/src/workload/codes-workload-dump.c b/src/workload/codes-workload-dump.c index 66d410ab..18756bba 100644 --- a/src/workload/codes-workload-dump.c +++ b/src/workload/codes-workload-dump.c @@ -15,7 +15,7 @@ static char type[128] = {'\0'}; static darshan_params d_params = {"", 0}; static iolang_params i_params = {0, 0, "", ""}; static recorder_params r_params = {"", 0}; -static dumpi_trace_params du_params = {"", 0}; +static dumpi_trace_params du_params = {"", 0, 0}; static online_comm_params oc_params = {"", "", 0}; static checkpoint_wrkld_params c_params = {0, 0, 0, 0, 0}; static iomock_params im_params = {0, 0, 1, 0, 0, 0}; diff --git a/src/workload/codes-workload.c b/src/workload/codes-workload.c index 934066ce..190993d3 100644 --- a/src/workload/codes-workload.c +++ b/src/workload/codes-workload.c @@ -420,7 +420,6 @@ void codes_workload_print_op( case CODES_WK_SEND: fprintf(f, "op: app:%d rank:%d type:send " "src:%d dst:%d bytes:%"PRIu64" type:%d count:%d tag:%d " - "src:%d dst:%d bytes:%d type:%d count:%d tag:%d " "start:%.5e end:%.5e\n", app_id, rank, op->u.send.source_rank, op->u.send.dest_rank, diff --git a/src/workload/methods/codes-dumpi-trace-nw-wrkld.c b/src/workload/methods/codes-dumpi-trace-nw-wrkld.c index 8c6c1527..f32291da 100644 --- a/src/workload/methods/codes-dumpi-trace-nw-wrkld.c +++ b/src/workload/methods/codes-dumpi-trace-nw-wrkld.c @@ -36,7 +36,7 @@ #define UNDUMPI_CLOSE undumpi_close #endif -#define MAX_LENGTH_FILE 512 +#define MAX_LENGTH_FILE 1024 #define MAX_OPERATIONS 32768 #define DUMPI_IGNORE_DELAY 100 diff --git a/tests/download-traces.sh b/tests/download-traces.sh index 504ca38e..975cca41 100755 --- a/tests/download-traces.sh +++ b/tests/download-traces.sh @@ -2,6 +2,6 @@ FILE=/tmp/df_AMG_n27_dumpi/dumpi-2014.03.03.14.55.00-0000.bin if [ ! -f $FILE ]; then - wget https://portal.nersc.gov/project/CAL/doe-miniapps-mpi-traces/AMG/df_AMG_n27_dumpi.tar.gz + wget https://raw.githubusercontent.com/codes-org/codes-files/master/tests/df_AMG_n27_dumpi.tar.gz tar -xvf df_AMG_n27_dumpi.tar.gz -C /tmp fi diff --git a/tests/mapping_test.c b/tests/mapping_test.c index f648a17c..6542935a 100644 --- a/tests/mapping_test.c +++ b/tests/mapping_test.c @@ -75,7 +75,7 @@ static void init(state *ns, tw_lp *lp){ // output-based check - print out IDs, compare against expected - char tmp[128]; + char tmp[512]; if (ns->anno[0]=='\0') tmp[0] = '\0'; else diff --git a/tests/workload/codes-workload-test.c b/tests/workload/codes-workload-test.c index ef9e470e..7e48fa81 100644 --- a/tests/workload/codes-workload-test.c +++ b/tests/workload/codes-workload-test.c @@ -106,7 +106,7 @@ int main( return(-1); } - struct codes_workload_method dummy_method = {"foo", NULL, NULL, NULL, NULL, NULL}; + struct codes_workload_method dummy_method = {"foo", NULL, NULL, NULL, NULL, NULL, NULL, NULL}; codes_workload_add_method(&dummy_method); workload_set_params();