-
Notifications
You must be signed in to change notification settings - Fork 1
/
DE5BridgeTop.bsv
366 lines (315 loc) · 11.3 KB
/
DE5BridgeTop.bsv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
// SPDX-License-Identifier: BSD-2-Clause
// This module implements a "bridge board", i.e. an FPGA board that acts
// as a proxy between a host PC (connected over PCIe) and an FPGA
// mesh (connected via a 10G link).
//
// The basic idea is that messages received from the host PC over PCIe
// are inserted onto the mesh's 10G network. Likewise, messages
// from the mesh's network are sent to the host PC over PCIe.
//
// The format of the data stream in the PC->FPGA direction is:
//
// 1. DA: Destination address (4 bytes)
// 2. NM: Number of messages that follow minus one (4 bytes)
// 3. FM: Number of flit payloads per message minus one (1 byte)
// 4. Padding (3 bytes)
// 5. Routing key (optional, 4 bytes)
// 6. (NM+1)*(FM+1) flit payloads ((NM+1)*(FM+1)*BytesPerFlit bytes)
// 7. Goto step 1
//
// The format of the data stream in the FPGA->PC direction is simply
// raw flit payloads.
//
// This module assumes that BytesPerFlit is 16. This restriction
// should be removed in future, if necessary.
package DE5BridgeTop;
// ============================================================================
// Imports
// ============================================================================
import Globals :: *;
import DRAM :: *;
import Interface :: *;
import Queue :: *;
import Vector :: *;
import Mailbox :: *;
import Network :: *;
import Mac :: *;
import PCIeStream :: *;
import Socket :: *;
import ConfigReg :: *;
import JtagUart :: *;
import DebugLink :: *;
import IdleDetector :: *;
import FlitMerger :: *;
// ============================================================================
// Interface
// ============================================================================
`ifdef SIMULATE
typedef Empty DE5BridgeTop;
`else
interface DE5BridgeTop;
// Interface to the PCIe BAR
interface PCIeBAR controlBAR;
// Interface to host PCIe bus
// (Use for DMA to/from host memory)
interface PCIeHostBus pcieHostBus;
// Interface to host over a JTAG UART
interface JtagUartAvalon jtagAvalon;
// Connections to FPGA cluster
interface AvalonMac macA;
interface AvalonMac macB;
// Reset request
(* always_enabled, always_ready *)
method Bool resetReq;
(* always_ready, always_enabled *)
method Action setTemperature(Bit#(8) temp);
endinterface
`endif
// ============================================================================
// Implementation
// ============================================================================
module de5BridgeTop (DE5BridgeTop);
// Ports
OutPort#(Bit#(128)) toPCIe <- mkOutPort;
InPort#(Bit#(128)) fromPCIe <- mkInPort;
OutPort#(Flit) toLinkA <- mkOutPort;
OutPort#(Flit) toLinkB <- mkOutPort;
InPort#(Flit) fromLink <- mkInPort;
OutPort#(Bit#(8)) toJtag <- mkOutPort;
InPort#(Bit#(8)) fromJtag <- mkInPort;
OutPort#(Flit) toDetector <- mkOutPort;
InPort#(Flit) fromDetector <- mkInPort;
// Create JTAG UART instance
JtagUart uart <- mkJtagUart;
// Conect ports to UART
connectUsing(mkUGShiftQueue1(QueueOptFmax), toJtag.out, uart.jtagIn);
connectUsing(mkUGShiftQueue1(QueueOptFmax), uart.jtagOut, fromJtag.in);
// Create PCIeStream instance
PCIeStream pcie <- mkPCIeStream;
// Create off-board links
Reg#(Bool) enableLinks <- mkConfigReg(False);
BoardLink linkA <- mkBoardLink(pcie.en, northSocket[0]);
BoardLink linkB <- mkBoardLink(pcie.en, southSocket[0]);
// Connect ports to off-board links
connectUsing(mkUGQueue, toLinkA.out, linkA.flitIn);
connectUsing(mkUGQueue, toLinkB.out, linkB.flitIn);
// Connect ports to PCIeStream
connectUsing(mkUGQueue, toPCIe.out, pcie.streamIn);
connectDirect(pcie.streamOut, fromPCIe.in);
// Create idle detector master
IdleDetectMaster detector <- mkIdleDetectMaster;
// Connect ports to idle detect master
connectUsing(mkUGQueue, toDetector.out, detector.flitIn);
connectUsing(mkUGQueue, detector.flitOut, fromDetector.in);
// Is the idle detected enabled
Reg#(Bool) idleDetectedEnabled <- mkConfigReg(False);
// Temperature of this board
Reg#(Bit#(8)) temperature <- mkConfigReg(128);
// Merge off-board input streams
// -----------------------------
// Merge two input inter-board input streams into one
let mergeOut <- mkFlitMerger(linkA.flitOut, linkB.flitOut);
connectUsing(mkUGQueue, mergeOut, fromLink.in);
// Split off-board output stream
// -----------------------------
// Link output buffer (this is the stream to split)
Queue#(Flit) linkOutBuffer <- mkUGQueue;
rule split (linkOutBuffer.notEmpty);
Flit flit = linkOutBuffer.dataOut;
// If board Y coord is even (or it's an idle token), emit on lower link
if (flit.dest.addr.board.y[0] == 0 || flit.isIdleToken) begin
if (toLinkB.canPut) begin
linkOutBuffer.deq;
toLinkB.put(flit);
end
// If board Y coord is odd, emit on higher link
end else if (toLinkA.canPut) begin
linkOutBuffer.deq;
toLinkA.put(flit);
end
endrule
// Connect PCIe stream and 10G link
// --------------------------------
Reg#(Bit#(32)) fromPCIeDA <- mkConfigRegU;
Reg#(Bit#(32)) fromPCIeNM <- mkConfigRegU;
Reg#(Bit#(8)) fromPCIeFM <- mkConfigRegU;
Reg#(Bit#(32)) fromPCIeKey <- mkConfigRegU;
Reg#(Bit#(1)) toLinkState <- mkConfigReg(0);
Reg#(Bit#(32)) messageCount <- mkConfigReg(0);
Reg#(Bit#(8)) flitCount <- mkConfigReg(0);
Reg#(Bool) hostInjectInProgress <- mkConfigReg(False);
rule toLink0 (toLinkState == 0);
if (fromDetector.canGet) begin
if (linkOutBuffer.notFull) begin
linkOutBuffer.enq(fromDetector.value);
fromDetector.get;
end
end else begin
if (hostInjectInProgress)
toLinkState <= 1;
else if (fromPCIe.canGet) begin
hostInjectInProgress <= True;
Bit#(128) data = fromPCIe.value;
fromPCIeDA <= data[31:0];
fromPCIeNM <= data[63:32];
fromPCIeFM <= data[95:88];
fromPCIeKey <= data[127:96];
toLinkState <= 1;
fromPCIe.get;
end
end
endrule
rule toLink1 (toLinkState == 1);
if (flitCount == 0 && detector.disableHostMsgs) begin
// Hold off sending
toLinkState <= 0;
end else begin
if (fromPCIe.canGet && linkOutBuffer.notFull) begin
// Determine flit destination address
Bit#(6) destThread = fromPCIeDA[`LogThreadsPerMailbox-1:0];
Vector#(64, Bool) destThreads = newVector();
for (Integer i = 0; i < 64; i=i+1)
destThreads[i] = destThread == fromInteger(i);
// Construct flit
Flit flit;
flit.dest.addr = unpack(truncate(fromPCIeDA[31:`LogThreadsPerMailbox]));
flit.dest.threads = pack(destThreads);
// If address says to use routing key, then use it
if (flit.dest.addr.isKey) begin
flit.dest.threads = zeroExtend(fromPCIeKey);
end
flit.payload = fromPCIe.value;
flit.notFinalFlit = True;
flit.isIdleToken = False;
if (flitCount == fromPCIeFM) begin
flitCount <= 0;
flit.notFinalFlit = False;
if (messageCount == fromPCIeNM) begin
messageCount <= 0;
toLinkState <= 0;
hostInjectInProgress <= False;
end else
messageCount <= messageCount+1;
end else
flitCount <= flitCount+1;
linkOutBuffer.enq(flit);
fromPCIe.get;
if (flitCount == 0) detector.incCount;
end
end
endrule
// We always send a message-sized message to the host
// When a message is less than that size, we emit padding
Reg#(Bit#(`LogMaxFlitsPerMsg)) toPCIePadding <- mkReg(0);
// Count the flits in a message going to the host
Reg#(Bit#(`LogMaxFlitsPerMsg)) toPCIeFlitCount <- mkReg(0);
// Connect 10G link to PCIe stream and idle detector
rule fromLinkRule;
Flit flit = fromLink.value;
if (toPCIePadding == 0) begin
if (fromLink.canGet) begin
if (flit.isIdleToken) begin
if (toDetector.canPut) begin
toDetector.put(flit);
fromLink.get;
end
end else begin
if (toPCIe.canPut) begin
toPCIe.put(flit.payload);
fromLink.get;
if (flit.notFinalFlit) begin
toPCIeFlitCount <= toPCIeFlitCount+1;
end else begin
toPCIePadding <=
fromInteger (`MaxFlitsPerMsg-1) - toPCIeFlitCount;
toPCIeFlitCount <= 0;
detector.decCount;
end
end
end
end
end else begin
if (toPCIe.canPut) begin
toPCIe.put(0);
toPCIePadding <= toPCIePadding-1;
end
end
endrule
// Dimensions of the board mesh (received over the UART)
Reg#(Bit#(`MeshXBits1)) meshXLen <- mkConfigReg(0);
Reg#(Bit#(`MeshYBits1)) meshYLen <- mkConfigReg(0);
Reg#(Bit#(TAdd#(`MeshXBits1, `MeshYBits1))) meshBoards <- mkConfigReg(0);
// Is idle-detection currently enabled
Reg#(Bool) idleDetectorEnabled <- mkConfigReg(False);
// Pass idle-detector options to idle-detector
rule enabler;
detector.enabled(idleDetectorEnabled, meshXLen, meshYLen, meshBoards);
endrule
// In simulation, display start-up message
`ifdef SIMULATE
rule displayStartup;
let t <- $time;
if (t == 0) begin
$display("\nSimulator for bridge board started");
end
endrule
`endif
// JTAG UART Handler
// -----------------
// Respond to the Query command with a zero byte. The host uses the
// query command to distinguish this bridge board from a worker board,
// which returns non-zero.
// On the 2nd query command, enable the idle detector.
// This is to allow the ids of all boards to be set before
// enabling the idle detector.
//
// The parameter byte of the second query command contains
// the dimensions of the board mesh: Y = byte[7:4], X = byte[3:0].
Reg#(Bit#(8)) boardIdWithinBox <- mkConfigReg(0);
Reg#(Bit#(3)) uartState <- mkConfigReg(0);
Reg#(Bit#(8)) cmd <- mkConfigRegU;
rule uartReceive0 (fromJtag.canGet && uartState == 0);
fromJtag.get;
cmd <= fromJtag.value;
if (fromJtag.value == 4) // Temperature query
uartState <= 3;
else // Standard query
uartState <= 1;
endrule
rule uartReceive1 (fromJtag.canGet && uartState == 1);
fromJtag.get;
if (cmd != 0) begin
idleDetectorEnabled <= True;
Bit#(`MeshXBits1) xLen = truncate(fromJtag.value[3:0]);
Bit#(`MeshYBits1) yLen = truncate(fromJtag.value[7:4]);
meshYLen <= yLen;
meshXLen <= xLen;
meshBoards <= zeroExtend(xLen) * zeroExtend(yLen);
end
uartState <= 2;
endrule
rule uartReceive2 (fromJtag.canGet && uartState == 2);
fromJtag.get;
uartState <= 3;
endrule
rule uartRespond0 (toJtag.canPut && uartState == 3);
toJtag.put(cmd == 4 ? 4 : 0);
uartState <= 4;
endrule
rule uartRespond1 (toJtag.canPut && uartState == 4);
toJtag.put(cmd == 4 ? temperature : 0);
uartState <= 0;
endrule
`ifndef SIMULATE
interface controlBAR = pcie.external.controlBAR;
interface pcieHostBus = pcie.external.hostBus;
method Bool resetReq = pcie.external.resetReq;
interface macA = linkA.avalonMac;
interface macB = linkB.avalonMac;
interface jtagAvalon = uart.jtagAvalon;
method Action setTemperature(Bit#(8) temp);
temperature <= temp;
endmethod
`endif
endmodule
endpackage