-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdraft-jasinska-ix-bgp-route-server-01.xml
executable file
·840 lines (788 loc) · 39.4 KB
/
draft-jasinska-ix-bgp-route-server-01.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
<?xml version="1.0" encoding="us-ascii"?>
<!DOCTYPE rfc PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/rfc2629.dtd"[
<!ENTITY RFC2119 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2629 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2629.xml">
<!ENTITY RFC3552 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3552.xml">
<!ENTITY RFC3640 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3640.xml">
]>
<?xml-stylesheet type='text/xsl'
href="http://greenbytes.de/tech/webdav/rfc2629xslt/rfc2629.xslt" ?>
<?rfc strict="yes" ?>
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
(using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="std"
docName="draft-jasinska-ix-bgp-route-server-01"
ipr="trust200902"
obsoletes=""
updates=""
submissionType="IETF"
xml:lang="en">
<!-- category values: std, bcp, info, exp, and historic
ipr values: full3667, noModification3667, noDerivatives3667
you can add the attributes updates="NNNN" and obsoletes="NNNN"
they will automatically be output with "(if approved)" -->
<!-- ***** FRONT MATTER ***** -->
<front>
<title abbrev="IX BGP Route Server">
Internet Exchange Route Server
</title>
<author initials="E" surname="Jasinska" fullname="Elisa Jasinska">
<organization>Limelight Networks</organization>
<address>
<postal>
<street>2220 W 14th St</street>
<city>Tempe</city>
<region>AZ</region>
<code>85281</code>
<country>US</country>
</postal>
<email>[email protected]</email>
</address>
</author>
<author initials="N" surname="Hilliard" fullname="Nick Hilliard">
<organization>INEX</organization>
<address>
<postal>
<street>4027 Kingswood Road</street>
<city>Dublin</city>
<code>24</code>
<country>IE</country>
</postal>
<email>[email protected]</email>
</address>
</author>
<author initials="R" surname="Raszuk" fullname="Robert Raszuk">
<organization>Cisco Systems</organization>
<address>
<postal>
<street>170 West Tasman Drive</street>
<city>San Jose</city>
<region>CA</region>
<code>95134</code>
<country>US</country>
</postal>
<email>[email protected]</email>
</address>
</author>
<author initials="N" surname="Bakker" fullname="Niels Bakker">
<organization>AMS-IX B.V.</organization>
<address>
<postal>
<street>Westeinde 12</street>
<city>Amsterdam</city>
<region>NH</region>
<code>1017 ZN</code>
<country>NL</country>
</postal>
<email>[email protected]</email>
</address>
</author>
<date month="October" year="2010" />
<area>Routing</area>
<workgroup>GROW Working Group</workgroup>
<keyword>I-D</keyword>
<keyword>Internet-Draft</keyword>
<keyword>GROW</keyword>
<abstract>
<t>
The growing popularity of Internet exchange points (IXPs) brings a new
set of requirements to interconnect participating networks. While
bilateral exterior BGP sessions between exchange participants were
previously the most common means of exchanging reachability
information, the overhead associated with dense interconnection has
caused substantial operational scaling problems for Internet exchange
point participants.
</t>
<t>
This document outlines a specification for multilateral
interconnections at IXPs. Multilateral interconnection is a
method of exchanging routing information between three or more BGP
speakers using a single intermediate broker system, referred to as a
route server. Route servers are typically used on shared access media
networks such as Internet exchange points (IXPs), to facilitate simplified
interconnection between multiple Internet routers on such a network.
</t>
</abstract>
</front>
<middle>
<section title="Introduction to Multilateral Interconnection">
<t>
Internet exchange points (IXPs) provide IP data interconnection
facilities for their participants, typically using shared Layer-2
networking media such as Ethernet. The Border Gateway Protocol (BGP)
<xref target="RFC4271" />, an inter-Autonomous System routing
protocol, is commonly used to facilitate exchange of network
reachability information over such media.
</t>
<t>
In the case of bilateral interconnection between two exchange
participant routers, each router must be configured with a BGP session
to the other. At IXPs with many participants who wish to implement
dense interconnection, this requirement can lead both to large router
configurations and high administrative overhead. Given the growth in
the number of participants at many IXPs, it has become operationally
troublesome to implement densely meshed interconnections at these
IXPs.
</t>
<t>
Multilateral interconnection is a method of interconnecting BGP
speaking routers using a third party brokering system, commonly
referred to as a route server and typically managed by the IXP
operator. Each of the multilateral interconnection participants
(usually referred to as route server clients) announces network
reachability information to the route server using exterior BGP, and
the route server in turn forwards this information to each other route
server client connected to it, according to its configuration.
Although a route server uses BGP to exchange reachability information
with each of its clients, it does not forward traffic itself and is
therefore not a router.
</t>
<t>
A route server can be viewed as similar in function to an <xref
target="RFC4456" /> route reflector, except that it operates using
EBGP instead of iBGP. Certain adaptions to <xref target="RFC4271" /> are
required, to enable an EBGP router to operate as a route server, which
are outlined in <xref target="spec" /> of this document.
Operational considerations to be taken into account in a route server
deployment are subject of <xref target="ops_considerations" />.
</t>
<t>
The term "route server" is often in a different context used to
describe a BGP node whose purpose is to accept BGP feeds from
multiple clients for the purpose of operational analysis and
troubleshooting. A system of this form may alternatively be known
as a "route collector" or a "route-views server". This document
uses the term "route server" exclusively to describe multilateral
peering brokerage systems.
</t>
<section title="Specification of Requirements">
<t>
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and
"OPTIONAL" in this document are to be interpreted as described in
<xref target="RFC2119" />.
</t>
</section>
</section>
<section title="Bilateral Interconnection">
<t>
Bilateral interconnection is a method of interconnecting
routers using individual BGP sessions between each participant router
on an IXP in order to exchange reachability information. While
interconnection policies vary from participant to participant, most
IXPs have significant numbers of participants who see value in
interconnecting with as many other exchange participants as possible.
In order for an IXP participant to implement a dense interconnection
policy, it is necessary for the participant to liaise with each of
their intended interconnection partners and if this partner agrees to
interconnect, then both participants' routers must be configured with
a BGP session to exchange network reachability information. If each
exchange participant interconnects with each other participant, a full
mesh of BGP sessions is needed, as detailed in <xref
target="ixp_interconnection" />.
</t>
<figure title='Full-Mesh Interconnection at an IXP' anchor="ixp_interconnection">
<preamble></preamble>
<artwork align='center'>
___ ___
/ \ / \
..| AS1 |..| AS2 |..
: \___/____\___/ :
: | \ / | :
: | \ / | :
: IXP | \/ | :
: | /\ | :
: | / \ | :
: _|_/____\_|_ :
: / \ / \ :
..| AS3 |..| AS4 |..
\___/ \___/
</artwork>
<postamble></postamble>
</figure>
<t>
<xref target="ixp_interconnection" /> depicts an IXP platform with
four connected routers, administered by four separate exchange
participants, each of them with a locally unique autonomous system number:
AS1, AS2, AS3 and AS4. Each of these four participants wishes to
exchange traffic with all other participants; this is accomplished by
configuring a full mesh of BGP sessions on each router connected to
the exchange, resulting in 6 BGP sessions across the IXP fabric.
</t>
<t>
The number of BGP sessions at an exchange has an upper bound of
n*(n-1)/2, where n is the number of routers at the exchange. As many
exchanges have relatively large numbers of participating networks, the
quadratic scaling requirements of dense interconnection tend to cause
operational and administrative overhead at large IXPs. Consequently,
new participants to an IXP require significant initial resourcing in
order to gain value from their IXP connection, while existing exchange
participants need to commit ongoing resources in order to benefit from
interconnecting with these new participants.
</t>
</section>
<section title="Multilateral Interconnection">
<t>
Multilateral interconnection is implemented using a route server
configured to use BGP to distribute network layer reachability
information (NLRI) among all client routers. The route server
preserves the BGP NEXT_HOP attribute from all received NLRI UPDATE
messages, and passes these messages with unchanged NEXT_HOP to its
route server clients, according to its configured routing policy.
Using this method of exchanging NLRI messages, an IXP participant
router can receive an aggregated list of prefixes from all other route
server clients using a single BGP session to the route server instead
of depending on BGP sessions with each other router at the exchange.
This reduces the overall number of BGP sessions at an Internet
exchange from n*(n-1)/2 to n, where n is the number of routers at the
exchange.
</t>
<t>
In practical terms, this allows dense interconnection between IXP
participants with low administrative overhead and significantly
simpler and smaller router configurations. In particular, new IXP
participants benefit from immediate and extensive interconnection,
while existing route server participants receive reachability
information from these new participants without necessarily having to
adapt their configurations.
</t>
<figure title='IXP-based Interconnection with Route Server' anchor="rs_interconnection">
<preamble></preamble>
<artwork align='center'>
___ ___
/ \ / \
..| AS1 |..| AS2 |..
: \___/ \___/ :
: \ / :
: \ / :
: \__/ :
: IXP / \ :
: | RS | :
: \____/ :
: / \ :
: / \ :
: __/ \__ :
: / \ / \ :
..| AS3 |..| AS4 |..
\___/ \___/
</artwork>
<postamble></postamble>
</figure>
<t>
As illustrated in <xref target="rs_interconnection" />, each router on
the IXP fabric requires only a single BGP session to the route server,
from which it can receive reachability information for all other
routers on the IXP which also connect to the route server.
</t>
</section>
<section title="Technical Considerations for Route Server Implementations" anchor="spec">
<section title="Client UPDATE Messages">
<t>
A route server MUST accept all UPDATE messages received
from each of its clients for inclusion in its Adj-RIB-In. These
UPDATE messages MAY by omitted from the route
server's Loc-RIB or Loc-RIBs, due to filters configured for the
purposes of implementing routing policy. The route server SHOULD
perform one or more BGP Decision Processes to select routes for
subsequent advertisement to its clients, taking into account
possible configuration to provide multiple NLRI paths to a
particular client as described in <xref target="phm_multiple_paths"
/> or multiple Loc-RIBs as described in <xref
target="phm_multiple_ribs" />. The route server SHOULD forward
UPDATE messages where appropriate from its Loc-RIB or Loc-RIBs to
its clients.
</t>
</section>
<section title="Attribute Transparency">
<t>
As a route server primarily performs a brokering service,
modification of attributes could cause route server clients to alter
their BGP best-path selection process for received prefix
reachability information, thereby changing the intended routing
policies of exchange participants. Therefore, contrary to what is
specified in section 5. of <xref target="RFC4271" />, route servers
SHOULD NOT update well-known BGP attributes received from route
server clients before redistributing them to their other route server
clients. Optional recognized and unrecognized BGP attributes,
whether transitive or non-transitive, SHOULD NOT be updated by
the route server and SHOULD be passed on to other route server
clients.
</t>
<section title="NEXT_HOP Attribute">
<t>
The NEXT_HOP, a well-known mandatory BGP attribute,
defines the IP address of the router
used as the next hop to the destinations listed in the Network
Layer Reachability Information field of the UPDATE message. As the
route server does not participate in the actual routing of
traffic, the NEXT_HOP attribute MUST be passed unmodified to the
route server clients, similar to the "third party" next hop
feature described in section 5.1.3. of <xref target="RFC4271" />.
</t>
</section>
<section title="AS_PATH Attribute" anchor="as_path_attr">
<t>
AS_PATH is a well-known mandatory attribute which identifies the
autonomous systems through which routing information carried in
the UPDATE message has passed.
</t>
<t>
As a route server does not participate in the process of
forwarding data between client routers, and because modification
of the AS_PATH attribute could affect route server client
best-path calculations, the route server SHOULD NOT prepend
its own AS number to the AS_PATH segment nor modify the AS_PATH
segment in any other way.
</t>
</section>
<section title="MULTI_EXIT_DISC Attribute">
<t>
MULTI_EXIT_DISC is an optional non-transitive attribute intended
to be used on external (inter-AS) links to discriminate among
multiple exit or entry points to the same neighboring AS. If
applied to an NLRI UPDATE sent to a route server, the attribute
(contrary to section 5.1.4 of <xref target="RFC4271" />) SHOULD
be propagated to other route server clients and the route server
SHOULD NOT modify the value of this attribute.
</t>
</section>
<section title="Communities Attributes">
<t>
The BGP COMMUNITIES (<xref target="RFC1997" />) and Extended
Communities (<xref target="RFC4360" />) attributes are
attributes intended for labeling information
carried in BGP UPDATE messages. Transitive as well as non-transitive
Communities attributes applied to an NLRI UPDATE sent
to a route server SHOULD NOT be modified, processed or
removed. However, if such an attribute is intended for processing
by the route server itself, it MAY be modified or removed.
</t>
</section>
</section>
<section title="Per-Client Prefix Filtering" anchor="prefix_filtering">
<section title="Prefix Hiding on a Route Server" anchor="prefix_hiding">
<t>
While IXP participants often use route servers with the intention
of interconnecting with as many other route server participants as
possible, there are several circumstances where control of prefix
distribution on a per-client basis is important for ensuring that
desired interconnection policies are met.
</t>
<figure title='Filtered Interconnection at an IXP' anchor="ixp_filtered_interconnection">
<preamble></preamble>
<artwork align='center'>
___ ___
/ \ / \
..| AS1 |..| AS2 |..
: \___/ \___/ :
: \ / | :
: \ / | :
: IXP \/ | :
: /\ | :
: / \ | :
: ___/____\_|_ :
: / \ / \ :
..| AS3 |..| AS4 |..
\___/ \___/
</artwork>
<postamble></postamble>
</figure>
<t>
Using the example in <xref target="ixp_filtered_interconnection"
/>, AS1 does not directly exchange prefix information with either
AS2 or AS3 at the IXP, but only interconnects with AS4.
</t>
<t>
In the traditional bilateral interconnection model, prefix
filtering to a third party exchange participant is accomplished
either by not engaging in a bilateral interconnection with that
participant or else by implementing outbound prefix filtering on
the BGP session towards that participant. However, in a
multilateral interconnection environment, only the route server
can perform outbound prefix filtering in the direction of the
route server client; route server clients depend on the
route server to perform their filtering for them.
</t>
<t>
If the same prefix is sent to a route server from multiple route
server clients with different BGP attributes, and traditional
best-path route selection is performed on that list of prefixes,
then the route server will select a single best-path prefix for
propagation to all connected clients. If, however, the route
server has been configured to filter the calculated best-path
prefix from reaching a particular route server client, then that
client will receive no reachability information for that prefix
from the route server, despite the fact that the route server has
received alternative reachability information for that prefix from
other route server clients. This phenomenon is referred to as
"prefix hiding".
</t>
<t>
For example, in <xref target="ixp_filtered_interconnection" />, if
the same prefix were sent to the route server via AS2 and AS4, and
the route via AS2 was preferred according to BGP's traditional
best-path selection, but AS2 was filtered by AS1, then AS1 would
never receive this prefix, even though the route server had
previously received a valid alternative path via AS4. This happens
because the best-path selection is performed only once on the
route server for all clients.
</t>
<t>
It should be noted that prefix hiding will only occur on route servers
which employ per-client prefix filtering; if an IXP operator
deploys a route server without prefix filtering, then prefix
hiding does not occur, as all paths are considered equally valid
from the point of view of the route server.
</t>
<t>
There are several techniques which may be employed to prevent the
prefix hiding problem from occurring. Route server implementations
SHOULD implement at least one method to prevent prefix hiding.
</t>
</section>
<section title="Mitigation Techniques" anchor="prefix_hiding_mitigation">
<section title="Multiple Route Server RIBs" anchor="phm_multiple_ribs">
<t>
The most portable means of preventing the route server prefix
hiding problem is by using a route server BGP implementation
which performs the per-client best-path calculation for each set
of prefixes which results after the route server's client
filtering policies have been taken into consideration. This can
be implemented by using per-client Loc-RIBs, with prefix
filtering implemented between the Adj-RIB-In and the per-client
Loc-RIB. Implementations MAY optimize this by
maintaining prefixes not subject to filtering policies in a
global Loc-RIB, with per-client Loc-RIBs stored as deltas.
</t>
<t>
This problem mitigation technique is highly portable, as it
makes no assumptions about the feature capabilities of the route
server clients.
</t>
</section>
<section title="Advertising Multiple Paths" anchor="phm_multiple_paths">
<t>
The prefix distribution model described above assumes standard
BGP session encoding where the route server sends a single path
to its client for any given prefix. This path is selected using
the BGP path selection decision process described in <xref
target="RFC4271" />. If, however, it were possible for the route
server to send more than a single path to a route server client,
then route server
clients would no longer depend on receiving a single best path to a
particular prefix; consequently, the prefix hiding problem
described in <xref target="prefix_hiding" /> would disappear.
</t>
<t>
We present two methods which describe how such
increased path diversity could be implemented.
</t>
<section title="Diverse BGP Path Approach" anchor="phm_diverse_bgp">
<t>
The Diverse BGP Path proposal as defined in <xref
target="I-D.ietf-grow-diverse-bgp-path-dist"></xref> is a
simple way to distribute multiple prefix paths from a route
server to a route server client by using a separate BGP
session from the route server to a client
for each different path.
</t>
<t>
The number of paths which may be distributed to a client is
constrained by the number of BGP sessions which the server and
the client are willing to establish with each other. The
distributed paths may be established from the global BGP
Loc-RIB on the route server in addition to any per-client
Loc-RIB. As there may be more potential paths to a given
prefix than configured BGP sessions, this method is not
guaranteed to eliminate the prefix hiding problem in all
situations. Furthermore, this method may significantly
increase the number of BGP sessions handled by the route
server, which may negatively impact its performance.
</t>
</section>
<section title="BGP ADD-PATH Approach">
<t>
The <xref target="I-D.ietf-idr-add-paths"></xref> Internet
draft proposes a different approach to multiple path
propagation, by allowing a BGP speaker to forward multiple
paths for the same prefix on a single BGP session. As <xref
target="RFC4271" /> specifies that a BGP listener must
implement an implicit withdraw when it receives an UPDATE
message for a prefix which already exists in its Adj-RIB-In,
this approach requires explicit support for the feature both
on the route server and on its clients.
</t>
<t>
If the ADD-PATH capability is negotiated bidirectionally
between the route server and a route server client, and the
route server client propagates multiple paths for the same
prefix to the route server, then this could potentially cause
the propagation of inactive, invalid or suboptimal paths to
the route server, thereby causing loss of reachability to
other route server clients. For this reason, ADD-PATH
implementations on a route server SHOULD enforce send-only
mode with the route server clients, which would result in
negotiating receive-only mode from the client to the route
server.
</t>
</section>
</section>
</section>
</section>
</section>
<section title="Operational Considerations for Route Server Installations" anchor="ops_considerations">
<section title="Route Server Scaling" anchor="ops_scaling">
<t>
While deployment of multiple Loc-RIBs on the route server presents a
simple way to avoid the prefix hiding problem noted in <xref
target="prefix_hiding" />, this approach requires significantly more
computing resources on the route server than where a single Loc-RIB
is deployed for all clients. As the <xref target="RFC4271" />
Decision Process must be applied to all Loc-RIBs deployed on the
route server, both CPU and memory requirements on the host computer
scale approximately according to O(P * N), where P is the total
number of unique prefixes received by the route server and N is the
number of route server clients which require a unique Loc-RIB. As
this is a super-linear scaling relationship, large route servers
may derive benefit from deploying per-client
Loc-RIBs only where they are required.
</t>
<t>
Regardless of any Loc-RIB optimization implemented, the route
server's control plane bandwidth requirements will scale
according to O(P * N), where P is the total number of
unique prefixes received by the route server and N is the total
number of route server clients. In the case where P_avg (the arithmetic mean
number of unique prefixes received per route server client) remains
roughly constant even as the number of connected clients increases,
this relationship can be rewritten as O((P_avg * N) * N) or O(N^2).
This quadratic upper bound on the network traffic requirements
indicates that the route server model will not scale to
arbitrarily large sizes.
</t>
<section title="Tackling Scaling Issues">
<t>
The network traffic scaling issue presents significant
difficulties with no clear solution - ultimately, each client
must receive a UPDATE for each unique prefix received by the
route server. However, there are several potential methods for
dealing with the CPU and memory resource requirements of route
servers.
</t>
<section title="View Merging and Decomposition">
<t>
View merging and decomposition, outlined in <xref
target="RS-ARCH" />, describes a method of optimising
memory and CPU requirements where multiple route server clients
are subject to exactly the same routing policies. In this
situation, the multiple Loc-RIB views required by each client are
merged into a single view.
</t>
<t>
A variation of this approach may be implemented on
route servers by ensuring that separate Loc-RIBs are only
configured for route server clients with unique export peering
policies.
</t>
</section>
<section title="Destination Splitting">
<t>
Destination splitting, also described in <xref
target="RS-ARCH" />, describes a method for route server
clients to connect to multiple route servers and to send
non-overlapping sets of prefixes to each route server. As
each route server computes the best path for its own set of
prefixes, the quadratic scaling requirement operates on
multiple smaller sets of prefixes. This reduces the overall
computational and memory requirements for managing multiple
Loc-RIBs and performing the best-path calculation on each. In
order for this method to perform well, destination splitting
would require significant co-ordination between the route
server operator and each route server client. In practice,
such levels of co-ordination are unlikely to work
successfully, thereby diminishing the value of this approach.
</t>
</section>
<section title="NEXT_HOP Resolution">
<t>
As route servers are usually deployed at IXPs which use flat
layer 2 networks, recursive resolution of the NEXT_HOP
attribute is generally not required, and can be replaced by a
simple check to ensure that the NEXT_HOP value for each prefix
is a network address on the IXP LAN's IP address range.
</t>
</section>
</section>
</section>
<section title="NLRI Leakage Mitigation" anchor="ops_leakage_mitigation">
<t>
NLRI leakage occurs when a BGP client unintentionally distributes
NLRI UPDATE messages to one or more neighboring BGP routers. NLRI
leakage of this form to a route server can cause connectivity
problems at an IXP if each route server client is configured to
accept all prefix UPDATE messages from the route server. It is
therefore RECOMMENDED when deploying route servers that, due to the
potential for collateral damage caused by NLRI leakage, route server
operators deploy NLRI leakage mitigation measures in order to
prevent unintentional prefix announcements or else limit the scale
of any such leak. Although not foolproof, per-client inbound prefix
limits can restrict the damage caused by prefix leakage in many
cases. Per-client inbound prefix filtering on the route server is a
more deterministic and usually more reliable means of preventing
prefix leakage, but requires more administrative resources to
maintain properly.
</t>
</section>
<section title="Route Server Redundancy" anchor="ops_use_two_of_em">
<t>
As the purpose of an IXP route server implementation is to provide a
reliable reachability brokerage service, it is RECOMMENDED that
exchange operators who implement route server systems provision
multiple route servers on each shared Layer-2 domain. There is no
requirement to use the same BGP implementation or operating system
for each route server
on the IXP fabric; however, it is RECOMMENDED that where an
operator provisions more than a single server on the same shared
Layer-2 domain, each route server implementation be configured
equivalently and in such a manner that the path reachability
information from each system is identical.
</t>
</section>
<section title="AS_PATH Consistency Check">
<t>
According to <xref target="RFC4271" />, a BGP speaker which
advertises a route to another external BGP speaker, must prepend
its own AS number as the last element of the AS_PATH sequence.
Therefore, the leftmost autonomous system number in an AS_PATH
attribute should normally be equal to the autonomous system number
of the BGP speaker that sent the UPDATE message.
</t>
<t>
Section 6.3 of <xref target="RFC4271" /> suggests that a BGP
speaker MAY check the AS_PATH attribute of each UPDATE message to
ensure that the leftmost AS in the AS_PATH is equal to the
autonomous system number of the peer which sent the message.
</t>
<t>
Route servers do not modify the AS_PATH attribute (as described in
<xref target="as_path_attr" />), since they do not participate in
the traffic exchange. Therefore a consistency check on the
AS_PATH of an UPDATE received by a route server client should fail
if the route server operator does not prepend the route server ASN
to the AS path. In this situation, it is RECOMMENDED that route
server clients disable the AS_PATH consistency check towards the
route server.
</t>
</section>
<section title="Implementing Routing Policies">
<t>
Prefix filtering is commonly implemented on route servers to
provide prefix distribution control mechanisms for route server
clients. There are a few commonly used strategies available.
</t>
<section title="Communities">
<t>
Prefixes sent to the route server are tagged with certain
COMMUNITIES attributes agreed upon beforehand between the operator
and all participants. Based on the values, routes are propagated
to all other participants, a subset of participants, or none.
This allows for one-way filtering policies to be implemented
on the route server; if a participant chooses not to exchange routes
with a certain other participant, he will have to instruct the route
server to not announce his own routes and filter incoming routes on
his own router.
</t>
</section>
<section title="Internet Routing Registry">
<t>
Filters configured on the route server can be constructed by querying
an Internet Routing Registry database for RPSL <xref target="RFC2622" />
objects placed there by participating operators. Import and export
statements for the route server's ASN in an aut-num object define their
desired policy, from which the configured filters are derived.
</t>
</section>
</section>
</section>
<section title="Security Considerations">
<t>
On route server installations which do not employ prefix-hiding
mitigation techniques, the prefix hiding problem outlined in section
<xref target="prefix_hiding" /> can be used in certain circumstances
to proactively block third party prefix announcements from other route
server clients.
</t>
</section>
<section title="IANA Considerations">
<t>
The new set of mechanism for route servers does not require any new
allocations from IANA.
</t>
</section>
<section title="Acknowledgments">
<t>
The authors would like to thank Chris Hall, Ryan Bickhart and Steven
Bakker for their valuable input.
</t>
<t>
In addition, the authors would like to acknowledge the developers of
BIRD, OpenBGPD and Quagga, whose open source BGP implementations
include route server capabilities which are compliant with this
document.
</t>
</section>
</middle>
<back>
<references title="Normative References">
<?rfc include="reference.RFC.1997"?> <!-- BGP Communities -->
<?rfc include="reference.RFC.2119"?> <!-- key words -->
<?rfc include="reference.RFC.2622"?> <!-- RPSL -->
<?rfc include="reference.RFC.4271"?> <!-- BGP-4 -->
<?rfc include="reference.RFC.4360"?> <!-- Extendend Communities -->
<?rfc include="reference.RFC.4456"?> <!-- Route Reflector IBGP -->
<?rfc include="reference.I-D.ietf-idr-add-paths"?>
<?rfc include="reference.I-D.ietf-grow-diverse-bgp-path-dist"?>
<reference anchor="RS-ARCH"
target="http://www.cs.usc.edu/research/95-603.ps.Z">
<front>
<title>A Route Server Architecture for Inter-Domain Routing</title>
<author initials="R" surname="Govindan" fullname="Ramesh Govindan">
<organization>The University of Southern California</organization>
</author>
<author initials="C" surname="Alaettinoglu" fullname="Cengiz Alaettinoglu">
<organization>The University of Southern California</organization>
</author>
<author initials="K" surname="Varadhan" fullname="Kannan Varadhan">
<organization>The University of Southern California</organization>
</author>
<author initials="D" surname="Estrin" fullname="Deborah Estrin">
<organization>The University of Southern California</organization>
</author>
<date year="1995" />
</front>
</reference>
</references>
<references title="Informative References">
<?rfc include="reference.RFC.1863"?> <!-- old route server rfc -->
<?rfc include="reference.RFC.3418"?> <!-- MIB // should be referenced in MIB section-->
<?rfc include="reference.RFC.4223"?> <!-- Reclassification of RFC 1863 -->
<?rfc include="reference.RFC.4760"?> <!-- BGP-4 MP ext -->
<?rfc include="reference.RFC.5065"?> <!-- BGP confederations -->
<?rfc include="reference.RFC.5226"?> <!-- IANA Considerations Section Guidelines -->
</references>
</back>
</rfc>