1 /* $OpenBSD: rde_update.c,v 1.175 2025/02/04 18:16:56 denis Exp $ */
2
3 /*
4 * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21
22 #include <limits.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26
27 #include "bgpd.h"
28 #include "session.h"
29 #include "rde.h"
30 #include "log.h"
31
32 enum up_state {
33 UP_OK,
34 UP_ERR_LIMIT,
35 UP_FILTERED,
36 UP_EXCLUDED,
37 };
38
39 static struct community comm_no_advertise = {
40 .flags = COMMUNITY_TYPE_BASIC,
41 .data1 = COMMUNITY_WELLKNOWN,
42 .data2 = COMMUNITY_NO_ADVERTISE
43 };
44 static struct community comm_no_export = {
45 .flags = COMMUNITY_TYPE_BASIC,
46 .data1 = COMMUNITY_WELLKNOWN,
47 .data2 = COMMUNITY_NO_EXPORT
48 };
49 static struct community comm_no_expsubconfed = {
50 .flags = COMMUNITY_TYPE_BASIC,
51 .data1 = COMMUNITY_WELLKNOWN,
52 .data2 = COMMUNITY_NO_EXPSUBCONFED
53 };
54
55 static void up_prep_adjout(struct rde_peer *, struct filterstate *, uint8_t);
56
57 static int
up_test_update(struct rde_peer * peer,struct prefix * p)58 up_test_update(struct rde_peer *peer, struct prefix *p)
59 {
60 struct rde_aspath *asp;
61 struct rde_community *comm;
62 struct rde_peer *frompeer;
63
64 frompeer = prefix_peer(p);
65 asp = prefix_aspath(p);
66 comm = prefix_communities(p);
67
68 if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR)
69 fatalx("try to send out a botched path");
70 if (asp->flags & (F_ATTR_LOOP | F_ATTR_OTC_LEAK))
71 fatalx("try to send out a looped path");
72
73 if (peer == frompeer)
74 /* Do not send routes back to sender */
75 return (0);
76
77 if (!frompeer->conf.ebgp && !peer->conf.ebgp) {
78 /*
79 * route reflector redistribution rules:
80 * 1. if announce is set -> announce
81 * 2. from non-client, to non-client -> no
82 * 3. from client, to non-client -> yes
83 * 4. from non-client, to client -> yes
84 * 5. from client, to client -> yes
85 */
86 if (frompeer->conf.reflector_client == 0 &&
87 peer->conf.reflector_client == 0 &&
88 (asp->flags & F_PREFIX_ANNOUNCED) == 0)
89 /* Do not redistribute updates to ibgp peers */
90 return (0);
91 }
92
93 /*
94 * With "transparent-as yes" set do not filter based on
95 * well-known communities. Instead pass them on to the client.
96 */
97 if (peer->flags & PEERFLAG_TRANS_AS)
98 return (1);
99
100 /* well-known communities */
101 if (community_match(comm, &comm_no_advertise, NULL))
102 return (0);
103 if (peer->conf.ebgp) {
104 if (community_match(comm, &comm_no_export, NULL))
105 return (0);
106 if (community_match(comm, &comm_no_expsubconfed, NULL))
107 return (0);
108 }
109
110 return (1);
111 }
112
113 /* RFC9234 open policy handling */
114 static int
up_enforce_open_policy(struct rde_peer * peer,struct filterstate * state,uint8_t aid)115 up_enforce_open_policy(struct rde_peer *peer, struct filterstate *state,
116 uint8_t aid)
117 {
118 /* only for IPv4 and IPv6 unicast */
119 if (aid != AID_INET && aid != AID_INET6)
120 return 0;
121
122 /*
123 * do not propagate (consider it filtered) if OTC is present and
124 * local role is peer, customer or rs-client.
125 */
126 if (peer->role == ROLE_PEER || peer->role == ROLE_CUSTOMER ||
127 peer->role == ROLE_RS_CLIENT)
128 if (state->aspath.flags & F_ATTR_OTC)
129 return 1;
130
131 /*
132 * add OTC attribute if not present towards peers, customers and
133 * rs-clients (local roles peer, provider, rs).
134 */
135 if (peer->role == ROLE_PEER || peer->role == ROLE_PROVIDER ||
136 peer->role == ROLE_RS)
137 if ((state->aspath.flags & F_ATTR_OTC) == 0) {
138 uint32_t tmp;
139
140 tmp = htonl(peer->conf.local_as);
141 if (attr_optadd(&state->aspath,
142 ATTR_OPTIONAL|ATTR_TRANSITIVE, ATTR_OTC,
143 &tmp, sizeof(tmp)) == -1)
144 log_peer_warnx(&peer->conf,
145 "failed to add OTC attribute");
146 state->aspath.flags |= F_ATTR_OTC;
147 }
148
149 return 0;
150 }
151
152 /*
153 * Process a single prefix by passing it through the various filter stages
154 * and if not filtered out update the Adj-RIB-Out. Returns:
155 * - UP_OK if prefix was added
156 * - UP_ERR_LIMIT if the peer outbound prefix limit was reached
157 * - UP_FILTERED if prefix was filtered out
158 * - UP_EXCLUDED if prefix was excluded because of up_test_update()
159 */
160 static enum up_state
up_process_prefix(struct rde_peer * peer,struct prefix * new,struct prefix * p)161 up_process_prefix(struct rde_peer *peer, struct prefix *new, struct prefix *p)
162 {
163 struct filterstate state;
164 struct bgpd_addr addr;
165 int excluded = 0;
166
167 /*
168 * up_test_update() needs to run before the output filters
169 * else the well-known communities won't work properly.
170 * The output filters would not be able to add well-known
171 * communities.
172 */
173 if (!up_test_update(peer, new))
174 excluded = 1;
175
176 rde_filterstate_prep(&state, new);
177 pt_getaddr(new->pt, &addr);
178 if (rde_filter(peer->out_rules, peer, prefix_peer(new), &addr,
179 new->pt->prefixlen, &state) == ACTION_DENY) {
180 rde_filterstate_clean(&state);
181 return UP_FILTERED;
182 }
183
184 /* Open Policy Check: acts like an output filter */
185 if (up_enforce_open_policy(peer, &state, new->pt->aid)) {
186 rde_filterstate_clean(&state);
187 return UP_FILTERED;
188 }
189
190 if (excluded) {
191 rde_filterstate_clean(&state);
192 return UP_EXCLUDED;
193 }
194
195 /* from here on we know this is an update */
196 if (p == (void *)-1)
197 p = prefix_adjout_get(peer, new->path_id_tx, new->pt);
198
199 up_prep_adjout(peer, &state, new->pt->aid);
200 prefix_adjout_update(p, peer, &state, new->pt, new->path_id_tx);
201 rde_filterstate_clean(&state);
202
203 /* max prefix checker outbound */
204 if (peer->conf.max_out_prefix &&
205 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
206 log_peer_warnx(&peer->conf,
207 "outbound prefix limit reached (>%u/%u)",
208 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
209 rde_update_err(peer, ERR_CEASE,
210 ERR_CEASE_MAX_SENT_PREFIX, NULL);
211 return UP_ERR_LIMIT;
212 }
213
214 return UP_OK;
215 }
216
217 void
up_generate_updates(struct rde_peer * peer,struct rib_entry * re)218 up_generate_updates(struct rde_peer *peer, struct rib_entry *re)
219 {
220 struct prefix *new, *p;
221
222 p = prefix_adjout_first(peer, re->prefix);
223
224 new = prefix_best(re);
225 while (new != NULL) {
226 switch (up_process_prefix(peer, new, p)) {
227 case UP_OK:
228 case UP_ERR_LIMIT:
229 return;
230 case UP_FILTERED:
231 if (peer->flags & PEERFLAG_EVALUATE_ALL) {
232 new = TAILQ_NEXT(new, entry.list.rib);
233 if (new != NULL && prefix_eligible(new))
234 continue;
235 }
236 goto done;
237 case UP_EXCLUDED:
238 goto done;
239 }
240 }
241
242 done:
243 /* withdraw prefix */
244 if (p != NULL)
245 prefix_adjout_withdraw(p);
246 }
247
248 /*
249 * Generate updates for the add-path send case. Depending on the
250 * peer eval settings prefixes are selected and distributed.
251 * This highly depends on the Adj-RIB-Out to handle prefixes with no
252 * changes gracefully. It may be possible to improve the API so that
253 * less churn is needed.
254 */
255 void
up_generate_addpath(struct rde_peer * peer,struct rib_entry * re)256 up_generate_addpath(struct rde_peer *peer, struct rib_entry *re)
257 {
258 struct prefix *head, *new, *p;
259 int maxpaths = 0, extrapaths = 0, extra;
260 int checkmode = 1;
261
262 head = prefix_adjout_first(peer, re->prefix);
263
264 /* mark all paths as stale */
265 for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
266 p->flags |= PREFIX_FLAG_STALE;
267
268 /* update paths */
269 new = prefix_best(re);
270 while (new != NULL) {
271 /* check limits and stop when a limit is reached */
272 if (peer->eval.maxpaths != 0 &&
273 maxpaths >= peer->eval.maxpaths)
274 break;
275 if (peer->eval.extrapaths != 0 &&
276 extrapaths >= peer->eval.extrapaths)
277 break;
278
279 extra = 1;
280 if (checkmode) {
281 switch (peer->eval.mode) {
282 case ADDPATH_EVAL_BEST:
283 if (new->dmetric == PREFIX_DMETRIC_BEST)
284 extra = 0;
285 else
286 checkmode = 0;
287 break;
288 case ADDPATH_EVAL_ECMP:
289 if (new->dmetric == PREFIX_DMETRIC_BEST ||
290 new->dmetric == PREFIX_DMETRIC_ECMP)
291 extra = 0;
292 else
293 checkmode = 0;
294 break;
295 case ADDPATH_EVAL_AS_WIDE:
296 if (new->dmetric == PREFIX_DMETRIC_BEST ||
297 new->dmetric == PREFIX_DMETRIC_ECMP ||
298 new->dmetric == PREFIX_DMETRIC_AS_WIDE)
299 extra = 0;
300 else
301 checkmode = 0;
302 break;
303 case ADDPATH_EVAL_ALL:
304 /* nothing to check */
305 checkmode = 0;
306 break;
307 default:
308 fatalx("unknown add-path eval mode");
309 }
310 }
311
312 switch (up_process_prefix(peer, new, (void *)-1)) {
313 case UP_OK:
314 maxpaths++;
315 extrapaths += extra;
316 break;
317 case UP_FILTERED:
318 case UP_EXCLUDED:
319 break;
320 case UP_ERR_LIMIT:
321 /* just give up */
322 return;
323 }
324
325 /* only allow valid prefixes */
326 new = TAILQ_NEXT(new, entry.list.rib);
327 if (new == NULL || !prefix_eligible(new))
328 break;
329 }
330
331 /* withdraw stale paths */
332 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
333 if (p->flags & PREFIX_FLAG_STALE)
334 prefix_adjout_withdraw(p);
335 }
336 }
337
338 /*
339 * Generate updates for the add-path send all case. Since all prefixes
340 * are distributed just remove old and add new.
341 */
342 void
up_generate_addpath_all(struct rde_peer * peer,struct rib_entry * re,struct prefix * new,struct prefix * old)343 up_generate_addpath_all(struct rde_peer *peer, struct rib_entry *re,
344 struct prefix *new, struct prefix *old)
345 {
346 struct prefix *p, *head = NULL;
347 int all = 0;
348
349 /*
350 * if old and new are NULL then insert all prefixes from best,
351 * clearing old routes in the process
352 */
353 if (old == NULL && new == NULL) {
354 /* mark all paths as stale */
355 head = prefix_adjout_first(peer, re->prefix);
356 for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
357 p->flags |= PREFIX_FLAG_STALE;
358
359 new = prefix_best(re);
360 all = 1;
361 }
362
363 if (new != NULL && !prefix_eligible(new)) {
364 /* only allow valid prefixes */
365 new = NULL;
366 }
367
368 if (old != NULL) {
369 /* withdraw stale paths */
370 p = prefix_adjout_get(peer, old->path_id_tx, old->pt);
371 if (p != NULL)
372 prefix_adjout_withdraw(p);
373 }
374
375 /* add new path (or multiple if all is set) */
376 while (new != NULL) {
377 switch (up_process_prefix(peer, new, (void *)-1)) {
378 case UP_OK:
379 case UP_FILTERED:
380 case UP_EXCLUDED:
381 break;
382 case UP_ERR_LIMIT:
383 /* just give up */
384 return;
385 }
386
387 if (!all)
388 break;
389
390 /* only allow valid prefixes */
391 new = TAILQ_NEXT(new, entry.list.rib);
392 if (new == NULL || !prefix_eligible(new))
393 break;
394 }
395
396 if (all) {
397 /* withdraw stale paths */
398 for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
399 if (p->flags & PREFIX_FLAG_STALE)
400 prefix_adjout_withdraw(p);
401 }
402 }
403 }
404
405 /* send a default route to the specified peer */
406 void
up_generate_default(struct rde_peer * peer,uint8_t aid)407 up_generate_default(struct rde_peer *peer, uint8_t aid)
408 {
409 extern struct rde_peer *peerself;
410 struct filterstate state;
411 struct rde_aspath *asp;
412 struct prefix *p;
413 struct pt_entry *pte;
414 struct bgpd_addr addr;
415
416 if (peer->capa.mp[aid] == 0)
417 return;
418
419 rde_filterstate_init(&state);
420 asp = &state.aspath;
421 asp->aspath = aspath_get(NULL, 0);
422 asp->origin = ORIGIN_IGP;
423 rde_filterstate_set_vstate(&state, ROA_NOTFOUND, ASPA_NEVER_KNOWN);
424 /* the other default values are OK, nexthop is once again NULL */
425
426 /*
427 * XXX apply default overrides. Not yet possible, mainly a parse.y
428 * problem.
429 */
430 /* rde_apply_set(asp, peerself, peerself, set, af); */
431
432 memset(&addr, 0, sizeof(addr));
433 addr.aid = aid;
434 p = prefix_adjout_lookup(peer, &addr, 0);
435
436 /* outbound filter as usual */
437 if (rde_filter(peer->out_rules, peer, peerself, &addr, 0, &state) ==
438 ACTION_DENY) {
439 rde_filterstate_clean(&state);
440 return;
441 }
442
443 up_prep_adjout(peer, &state, addr.aid);
444 /* can't use pt_fill here since prefix_adjout_update keeps a ref */
445 pte = pt_get(&addr, 0);
446 if (pte == NULL)
447 pte = pt_add(&addr, 0);
448 prefix_adjout_update(p, peer, &state, pte, 0);
449 rde_filterstate_clean(&state);
450
451 /* max prefix checker outbound */
452 if (peer->conf.max_out_prefix &&
453 peer->stats.prefix_out_cnt > peer->conf.max_out_prefix) {
454 log_peer_warnx(&peer->conf,
455 "outbound prefix limit reached (>%u/%u)",
456 peer->stats.prefix_out_cnt, peer->conf.max_out_prefix);
457 rde_update_err(peer, ERR_CEASE,
458 ERR_CEASE_MAX_SENT_PREFIX, NULL);
459 }
460 }
461
462 static struct bgpd_addr *
up_get_nexthop(struct rde_peer * peer,struct filterstate * state,uint8_t aid)463 up_get_nexthop(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
464 {
465 struct bgpd_addr *peer_local = NULL;
466
467 switch (aid) {
468 case AID_INET:
469 case AID_VPN_IPv4:
470 if (peer_has_ext_nexthop(peer, aid) &&
471 peer->remote_addr.aid == AID_INET6)
472 peer_local = &peer->local_v6_addr;
473 else if (peer->local_v4_addr.aid == AID_INET)
474 peer_local = &peer->local_v4_addr;
475 break;
476 case AID_INET6:
477 case AID_VPN_IPv6:
478 if (peer->local_v6_addr.aid == AID_INET6)
479 peer_local = &peer->local_v6_addr;
480 break;
481 case AID_EVPN:
482 if (peer->local_v4_addr.aid == AID_INET)
483 peer_local = &peer->local_v4_addr;
484 else if (peer->local_v6_addr.aid == AID_INET6)
485 peer_local = &peer->local_v6_addr;
486 break;
487 case AID_FLOWSPECv4:
488 case AID_FLOWSPECv6:
489 /* flowspec has no nexthop */
490 return (NULL);
491 default:
492 fatalx("%s, bad AID %s", __func__, aid2str(aid));
493 }
494
495 if (state->nhflags & NEXTHOP_SELF) {
496 /*
497 * Forcing the nexthop to self is always possible
498 * and has precedence over other flags.
499 */
500 return (peer_local);
501 } else if (!peer->conf.ebgp) {
502 /*
503 * in the ibgp case the nexthop is normally not
504 * modified unless it points at the peer itself.
505 */
506 if (state->nexthop == NULL) {
507 /* announced networks without explicit nexthop set */
508 return (peer_local);
509 }
510 /*
511 * per RFC: if remote peer address is equal to the nexthop set
512 * the nexthop to our local address. This reduces the risk of
513 * routing loops. This overrides NEXTHOP_NOMODIFY.
514 */
515 if (memcmp(&state->nexthop->exit_nexthop,
516 &peer->remote_addr, sizeof(peer->remote_addr)) == 0) {
517 return (peer_local);
518 }
519 return (&state->nexthop->exit_nexthop);
520 } else if (peer->conf.distance == 1) {
521 /*
522 * In the ebgp directly connected case never send
523 * out a nexthop that is outside of the connected
524 * network of the peer. No matter what flags are
525 * set. This follows section 5.1.3 of RFC 4271.
526 * So just check if the nexthop is in the same net
527 * is enough here.
528 */
529 if (state->nexthop != NULL &&
530 state->nexthop->flags & NEXTHOP_CONNECTED &&
531 prefix_compare(&peer->remote_addr,
532 &state->nexthop->nexthop_net,
533 state->nexthop->nexthop_netlen) == 0) {
534 /* nexthop and peer are in the same net */
535 return (&state->nexthop->exit_nexthop);
536 }
537 return (peer_local);
538 } else {
539 /*
540 * For ebgp multihop make it possible to overrule
541 * the sent nexthop by setting NEXTHOP_NOMODIFY.
542 * Similar to the ibgp case there is no same net check
543 * needed but still ensure that the nexthop is not
544 * pointing to the peer itself.
545 */
546 if (state->nhflags & NEXTHOP_NOMODIFY &&
547 state->nexthop != NULL &&
548 memcmp(&state->nexthop->exit_nexthop,
549 &peer->remote_addr, sizeof(peer->remote_addr)) != 0) {
550 /* no modify flag set and nexthop not peer addr */
551 return (&state->nexthop->exit_nexthop);
552 }
553 return (peer_local);
554 }
555 }
556
557 static void
up_prep_adjout(struct rde_peer * peer,struct filterstate * state,uint8_t aid)558 up_prep_adjout(struct rde_peer *peer, struct filterstate *state, uint8_t aid)
559 {
560 struct bgpd_addr *nexthop;
561 struct nexthop *nh = NULL;
562 u_char *np;
563 uint16_t nl;
564
565 /* prepend local AS number for eBGP sessions. */
566 if (peer->conf.ebgp && (peer->flags & PEERFLAG_TRANS_AS) == 0) {
567 uint32_t prep_as = peer->conf.local_as;
568 np = aspath_prepend(state->aspath.aspath, prep_as, 1, &nl);
569 aspath_put(state->aspath.aspath);
570 state->aspath.aspath = aspath_get(np, nl);
571 free(np);
572 }
573
574 /* update nexthop */
575 nexthop = up_get_nexthop(peer, state, aid);
576 if (nexthop != NULL)
577 nh = nexthop_get(nexthop);
578 nexthop_unref(state->nexthop);
579 state->nexthop = nh;
580 state->nhflags = 0;
581 }
582
583
584 static int
up_generate_attr(struct ibuf * buf,struct rde_peer * peer,struct rde_aspath * asp,struct rde_community * comm,struct nexthop * nh,uint8_t aid)585 up_generate_attr(struct ibuf *buf, struct rde_peer *peer,
586 struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh,
587 uint8_t aid)
588 {
589 struct attr *oa = NULL, *newaggr = NULL;
590 u_char *pdata;
591 uint32_t tmp32;
592 int flags, neednewpath = 0, rv;
593 uint16_t plen;
594 uint8_t oalen = 0, type;
595
596 if (asp->others_len > 0)
597 oa = asp->others[oalen++];
598
599 /* dump attributes in ascending order */
600 for (type = ATTR_ORIGIN; type < 255; type++) {
601 while (oa && oa->type < type) {
602 if (oalen < asp->others_len)
603 oa = asp->others[oalen++];
604 else
605 oa = NULL;
606 }
607
608 switch (type) {
609 /*
610 * Attributes stored in rde_aspath
611 */
612 case ATTR_ORIGIN:
613 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
614 ATTR_ORIGIN, &asp->origin, 1) == -1)
615 return -1;
616 break;
617 case ATTR_ASPATH:
618 plen = aspath_length(asp->aspath);
619 pdata = aspath_dump(asp->aspath);
620
621 if (!peer_has_as4byte(peer))
622 pdata = aspath_deflate(pdata, &plen,
623 &neednewpath);
624 rv = attr_writebuf(buf, ATTR_WELL_KNOWN,
625 ATTR_ASPATH, pdata, plen);
626 if (!peer_has_as4byte(peer))
627 free(pdata);
628
629 if (rv == -1)
630 return -1;
631 break;
632 case ATTR_NEXTHOP:
633 switch (aid) {
634 case AID_INET:
635 if (nh == NULL)
636 return -1;
637 if (nh->exit_nexthop.aid != AID_INET) {
638 if (peer_has_ext_nexthop(peer, aid))
639 break;
640 return -1;
641 }
642 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
643 ATTR_NEXTHOP, &nh->exit_nexthop.v4,
644 sizeof(nh->exit_nexthop.v4)) == -1)
645 return -1;
646 break;
647 default:
648 break;
649 }
650 break;
651 case ATTR_MED:
652 /*
653 * The old MED from other peers MUST not be announced
654 * to others unless the MED is originating from us or
655 * the peer is an IBGP one. Only exception are routers
656 * with "transparent-as yes" set.
657 */
658 if (asp->flags & F_ATTR_MED && (!peer->conf.ebgp ||
659 asp->flags & F_ATTR_MED_ANNOUNCE ||
660 peer->flags & PEERFLAG_TRANS_AS)) {
661 tmp32 = htonl(asp->med);
662 if (attr_writebuf(buf, ATTR_OPTIONAL,
663 ATTR_MED, &tmp32, 4) == -1)
664 return -1;
665 }
666 break;
667 case ATTR_LOCALPREF:
668 if (!peer->conf.ebgp) {
669 /* local preference, only valid for ibgp */
670 tmp32 = htonl(asp->lpref);
671 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
672 ATTR_LOCALPREF, &tmp32, 4) == -1)
673 return -1;
674 }
675 break;
676 /*
677 * Communities are stored in struct rde_community
678 */
679 case ATTR_COMMUNITIES:
680 case ATTR_EXT_COMMUNITIES:
681 case ATTR_LARGE_COMMUNITIES:
682 if (community_writebuf(comm, type, peer->conf.ebgp,
683 buf) == -1)
684 return -1;
685 break;
686 /*
687 * NEW to OLD conversion when sending stuff to a 2byte AS peer
688 */
689 case ATTR_AS4_PATH:
690 if (neednewpath) {
691 plen = aspath_length(asp->aspath);
692 pdata = aspath_dump(asp->aspath);
693
694 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
695 if (!(asp->flags & F_PREFIX_ANNOUNCED))
696 flags |= ATTR_PARTIAL;
697 if (plen != 0)
698 if (attr_writebuf(buf, flags,
699 ATTR_AS4_PATH, pdata, plen) == -1)
700 return -1;
701 }
702 break;
703 case ATTR_AS4_AGGREGATOR:
704 if (newaggr) {
705 flags = ATTR_OPTIONAL|ATTR_TRANSITIVE;
706 if (!(asp->flags & F_PREFIX_ANNOUNCED))
707 flags |= ATTR_PARTIAL;
708 if (attr_writebuf(buf, flags,
709 ATTR_AS4_AGGREGATOR, newaggr->data,
710 newaggr->len) == -1)
711 return -1;
712 }
713 break;
714 /*
715 * multiprotocol attributes are handled elsewhere
716 */
717 case ATTR_MP_REACH_NLRI:
718 case ATTR_MP_UNREACH_NLRI:
719 break;
720 /*
721 * dump all other path attributes. Following rules apply:
722 * 1. well-known attrs: ATTR_ATOMIC_AGGREGATE and
723 * ATTR_AGGREGATOR pass unmodified (enforce flags
724 * to correct values). Actually ATTR_AGGREGATOR may be
725 * deflated for OLD 2-byte peers.
726 * 2. non-transitive attrs: don't re-announce to ebgp peers
727 * 3. transitive known attrs: announce unmodified
728 * 4. transitive unknown attrs: set partial bit and re-announce
729 */
730 case ATTR_ATOMIC_AGGREGATE:
731 if (oa == NULL || oa->type != type)
732 break;
733 if (attr_writebuf(buf, ATTR_WELL_KNOWN,
734 ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1)
735 return -1;
736 break;
737 case ATTR_AGGREGATOR:
738 if (oa == NULL || oa->type != type)
739 break;
740 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
741 peer->conf.ebgp)
742 break;
743 if (!peer_has_as4byte(peer)) {
744 /* need to deflate the aggregator */
745 uint8_t t[6];
746 uint16_t tas;
747
748 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
749 peer->conf.ebgp)
750 break;
751
752 memcpy(&tmp32, oa->data, sizeof(tmp32));
753 if (ntohl(tmp32) > USHRT_MAX) {
754 tas = htons(AS_TRANS);
755 newaggr = oa;
756 } else
757 tas = htons(ntohl(tmp32));
758
759 memcpy(t, &tas, sizeof(tas));
760 memcpy(t + sizeof(tas),
761 oa->data + sizeof(tmp32),
762 oa->len - sizeof(tmp32));
763 if (attr_writebuf(buf, oa->flags,
764 oa->type, &t, sizeof(t)) == -1)
765 return -1;
766 } else {
767 if (attr_writebuf(buf, oa->flags, oa->type,
768 oa->data, oa->len) == -1)
769 return -1;
770 }
771 break;
772 case ATTR_ORIGINATOR_ID:
773 case ATTR_CLUSTER_LIST:
774 case ATTR_OTC:
775 if (oa == NULL || oa->type != type)
776 break;
777 if ((!(oa->flags & ATTR_TRANSITIVE)) &&
778 peer->conf.ebgp)
779 break;
780 if (attr_writebuf(buf, oa->flags, oa->type,
781 oa->data, oa->len) == -1)
782 return -1;
783 break;
784 default:
785 if (oa == NULL && type >= ATTR_FIRST_UNKNOWN)
786 /* there is no attribute left to dump */
787 return (0);
788
789 if (oa == NULL || oa->type != type)
790 break;
791 /* unknown attribute */
792 if (!(oa->flags & ATTR_TRANSITIVE)) {
793 /*
794 * RFC 1771:
795 * Unrecognized non-transitive optional
796 * attributes must be quietly ignored and
797 * not passed along to other BGP peers.
798 */
799 break;
800 }
801 if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL,
802 oa->type, oa->data, oa->len) == -1)
803 return -1;
804 }
805 }
806 return 0;
807 }
808
809 /*
810 * Check if the pending element is a EoR marker. If so remove it from the
811 * tree and return 1.
812 */
813 int
up_is_eor(struct rde_peer * peer,uint8_t aid)814 up_is_eor(struct rde_peer *peer, uint8_t aid)
815 {
816 struct prefix *p;
817
818 p = RB_MIN(prefix_tree, &peer->updates[aid]);
819 if (p != NULL && (p->flags & PREFIX_FLAG_EOR)) {
820 /*
821 * Need to remove eor from update tree because
822 * prefix_adjout_destroy() can't handle that.
823 */
824 RB_REMOVE(prefix_tree, &peer->updates[aid], p);
825 p->flags &= ~PREFIX_FLAG_UPDATE;
826 prefix_adjout_destroy(p);
827 return 1;
828 }
829 return 0;
830 }
831
832 /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */
833 #define MIN_UPDATE_LEN 16
834
835 static void
up_prefix_free(struct prefix_tree * prefix_head,struct prefix * p,struct rde_peer * peer,int withdraw)836 up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p,
837 struct rde_peer *peer, int withdraw)
838 {
839 if (withdraw) {
840 /* prefix no longer needed, remove it */
841 prefix_adjout_destroy(p);
842 peer->stats.prefix_sent_withdraw++;
843 } else {
844 /* prefix still in Adj-RIB-Out, keep it */
845 RB_REMOVE(prefix_tree, prefix_head, p);
846 p->flags &= ~PREFIX_FLAG_UPDATE;
847 peer->stats.pending_update--;
848 peer->stats.prefix_sent_update++;
849 }
850 }
851
852 /*
853 * Write prefixes to buffer until either there is no more space or
854 * the next prefix has no longer the same ASPATH attributes.
855 * Returns -1 if no prefix was written else 0.
856 */
857 static int
up_dump_prefix(struct ibuf * buf,struct prefix_tree * prefix_head,struct rde_peer * peer,int withdraw)858 up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head,
859 struct rde_peer *peer, int withdraw)
860 {
861 struct prefix *p, *np;
862 int done = 0, has_ap = -1, rv = -1;
863
864 RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) {
865 if (has_ap == -1)
866 has_ap = peer_has_add_path(peer, p->pt->aid,
867 CAPA_AP_SEND);
868 if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) ==
869 -1)
870 break;
871
872 /* make sure we only dump prefixes which belong together */
873 if (np == NULL ||
874 np->aspath != p->aspath ||
875 np->communities != p->communities ||
876 np->nexthop != p->nexthop ||
877 np->nhflags != p->nhflags ||
878 (np->flags & PREFIX_FLAG_EOR))
879 done = 1;
880
881 rv = 0;
882 up_prefix_free(prefix_head, p, peer, withdraw);
883 if (done)
884 break;
885 }
886 return rv;
887 }
888
889 static int
up_generate_mp_reach(struct ibuf * buf,struct rde_peer * peer,struct nexthop * nh,uint8_t aid)890 up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer,
891 struct nexthop *nh, uint8_t aid)
892 {
893 struct bgpd_addr *nexthop;
894 size_t off, nhoff;
895 uint16_t len, afi;
896 uint8_t safi;
897
898 /* attribute header, defaulting to extended length one */
899 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
900 return -1;
901 if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1)
902 return -1;
903 off = ibuf_size(buf);
904 if (ibuf_add_zero(buf, sizeof(len)) == -1)
905 return -1;
906
907 if (aid2afi(aid, &afi, &safi))
908 fatalx("up_generate_mp_reach: bad AID");
909
910 /* AFI + SAFI + NH LEN + NH + Reserved */
911 if (ibuf_add_n16(buf, afi) == -1)
912 return -1;
913 if (ibuf_add_n8(buf, safi) == -1)
914 return -1;
915 nhoff = ibuf_size(buf);
916 if (ibuf_add_zero(buf, 1) == -1)
917 return -1;
918
919 if (aid == AID_VPN_IPv4 || aid == AID_VPN_IPv6) {
920 /* write zero rd */
921 if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1)
922 return -1;
923 }
924
925 switch (aid) {
926 case AID_INET:
927 case AID_VPN_IPv4:
928 if (nh == NULL)
929 return -1;
930 nexthop = &nh->exit_nexthop;
931 /* AID_INET must only use this path with an IPv6 nexthop */
932 if (nexthop->aid == AID_INET && aid != AID_INET) {
933 if (ibuf_add(buf, &nexthop->v4,
934 sizeof(nexthop->v4)) == -1)
935 return -1;
936 break;
937 } else if (nexthop->aid == AID_INET6 &&
938 peer_has_ext_nexthop(peer, aid)) {
939 if (ibuf_add(buf, &nexthop->v6,
940 sizeof(nexthop->v6)) == -1)
941 return -1;
942 } else {
943 /* can't encode nexthop, give up and withdraw prefix */
944 return -1;
945 }
946 break;
947 case AID_INET6:
948 case AID_VPN_IPv6:
949 if (nh == NULL)
950 return -1;
951 nexthop = &nh->exit_nexthop;
952 if (ibuf_add(buf, &nexthop->v6, sizeof(nexthop->v6)) == -1)
953 return -1;
954 break;
955 case AID_EVPN:
956 if (nh == NULL)
957 return -1;
958 nexthop = &nh->exit_nexthop;
959 if (nexthop->aid == AID_INET) {
960 if (ibuf_add(buf, &nexthop->v4,
961 sizeof(nexthop->v4)) == -1)
962 return -1;
963 break;
964 } else if (nexthop->aid == AID_INET6) {
965 if (ibuf_add(buf, &nexthop->v6,
966 sizeof(nexthop->v6)) == -1)
967 return -1;
968 } else {
969 /* can't encode nexthop, give up and withdraw prefix */
970 return -1;
971 }
972 break;
973 case AID_FLOWSPECv4:
974 case AID_FLOWSPECv6:
975 /* no NH */
976 break;
977 default:
978 fatalx("up_generate_mp_reach: unknown AID");
979 }
980
981 /* update nexthop len */
982 len = ibuf_size(buf) - nhoff - 1;
983 if (ibuf_set_n8(buf, nhoff, len) == -1)
984 return -1;
985
986 if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */
987 return -1;
988
989 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
990 /* no prefixes written, fail update */
991 return -1;
992
993 /* update MP_REACH attribute length field */
994 len = ibuf_size(buf) - off - sizeof(len);
995 if (ibuf_set_n16(buf, off, len) == -1)
996 return -1;
997
998 return 0;
999 }
1000
1001 /*
1002 * Generate UPDATE message containing either just withdraws or updates.
1003 * UPDATE messages are contructed like this:
1004 *
1005 * +-----------------------------------------------------+
1006 * | Withdrawn Routes Length (2 octets) |
1007 * +-----------------------------------------------------+
1008 * | Withdrawn Routes (variable) |
1009 * +-----------------------------------------------------+
1010 * | Total Path Attribute Length (2 octets) |
1011 * +-----------------------------------------------------+
1012 * | Path Attributes (variable) |
1013 * +-----------------------------------------------------+
1014 * | Network Layer Reachability Information (variable) |
1015 * +-----------------------------------------------------+
1016 *
1017 * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI
1018 * the latter will be the only path attribute in a message.
1019 */
1020
1021 /*
1022 * Write UPDATE message for withdrawn routes. The size of buf limits
1023 * how may routes can be added. Return 0 on success -1 on error which
1024 * includes generating an empty withdraw message.
1025 */
1026 struct ibuf *
up_dump_withdraws(struct rde_peer * peer,uint8_t aid)1027 up_dump_withdraws(struct rde_peer *peer, uint8_t aid)
1028 {
1029 struct ibuf *buf;
1030 size_t off, pkgsize = MAX_PKTSIZE;
1031 uint16_t afi, len;
1032 uint8_t safi;
1033
1034 if (peer_has_ext_msg(peer))
1035 pkgsize = MAX_EXT_PKTSIZE;
1036
1037 if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL)
1038 goto fail;
1039
1040 /* reserve space for the withdrawn routes length field */
1041 off = ibuf_size(buf);
1042 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1043 goto fail;
1044
1045 if (aid != AID_INET) {
1046 /* reserve space for 2-byte path attribute length */
1047 off = ibuf_size(buf);
1048 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1049 goto fail;
1050
1051 /* attribute header, defaulting to extended length one */
1052 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1053 goto fail;
1054 if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1055 goto fail;
1056 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1057 goto fail;
1058
1059 /* afi & safi */
1060 if (aid2afi(aid, &afi, &safi))
1061 fatalx("%s: bad AID", __func__);
1062 if (ibuf_add_n16(buf, afi) == -1)
1063 goto fail;
1064 if (ibuf_add_n8(buf, safi) == -1)
1065 goto fail;
1066 }
1067
1068 if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1)
1069 goto fail;
1070
1071 /* update length field (either withdrawn routes or attribute length) */
1072 len = ibuf_size(buf) - off - sizeof(len);
1073 if (ibuf_set_n16(buf, off, len) == -1)
1074 goto fail;
1075
1076 if (aid != AID_INET) {
1077 /* write MP_UNREACH_NLRI attribute length (always extended) */
1078 len -= 4; /* skip attribute header */
1079 if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1080 goto fail;
1081 } else {
1082 /* no extra attributes so set attribute len to 0 */
1083 if (ibuf_add_zero(buf, sizeof(len)) == -1) {
1084 goto fail;
1085 }
1086 }
1087
1088 return buf;
1089
1090 fail:
1091 /* something went horribly wrong */
1092 log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
1093 ibuf_free(buf);
1094 return NULL;
1095 }
1096
1097 /*
1098 * Withdraw a single prefix after an error.
1099 */
1100 static struct ibuf *
up_dump_withdraw_one(struct rde_peer * peer,struct prefix * p,struct ibuf * buf)1101 up_dump_withdraw_one(struct rde_peer *peer, struct prefix *p, struct ibuf *buf)
1102 {
1103 size_t off;
1104 int has_ap;
1105 uint16_t afi, len;
1106 uint8_t safi;
1107
1108 /* reset the buffer and start fresh */
1109 ibuf_truncate(buf, 0);
1110
1111 /* reserve space for the withdrawn routes length field */
1112 off = ibuf_size(buf);
1113 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1114 goto fail;
1115
1116 if (p->pt->aid != AID_INET) {
1117 /* reserve space for 2-byte path attribute length */
1118 off = ibuf_size(buf);
1119 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1120 goto fail;
1121
1122 /* attribute header, defaulting to extended length one */
1123 if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1)
1124 goto fail;
1125 if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1)
1126 goto fail;
1127 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1128 goto fail;
1129
1130 /* afi & safi */
1131 if (aid2afi(p->pt->aid, &afi, &safi))
1132 fatalx("%s: bad AID", __func__);
1133 if (ibuf_add_n16(buf, afi) == -1)
1134 goto fail;
1135 if (ibuf_add_n8(buf, safi) == -1)
1136 goto fail;
1137 }
1138
1139 has_ap = peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND);
1140 if (pt_writebuf(buf, p->pt, 1, has_ap, p->path_id_tx) == -1)
1141 goto fail;
1142
1143 /* update length field (either withdrawn routes or attribute length) */
1144 len = ibuf_size(buf) - off - sizeof(len);
1145 if (ibuf_set_n16(buf, off, len) == -1)
1146 goto fail;
1147
1148 if (p->pt->aid != AID_INET) {
1149 /* write MP_UNREACH_NLRI attribute length (always extended) */
1150 len -= 4; /* skip attribute header */
1151 if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1)
1152 goto fail;
1153 } else {
1154 /* no extra attributes so set attribute len to 0 */
1155 if (ibuf_add_zero(buf, sizeof(len)) == -1) {
1156 goto fail;
1157 }
1158 }
1159
1160 return buf;
1161
1162 fail:
1163 /* something went horribly wrong */
1164 log_peer_warn(&peer->conf, "generating withdraw failed, peer desynced");
1165 ibuf_free(buf);
1166 return NULL;
1167 }
1168
1169 /*
1170 * Write UPDATE message for changed and added routes. The size of buf limits
1171 * how may routes can be added. The function first dumps the path attributes
1172 * and then tries to add as many prefixes using these attributes.
1173 * Return 0 on success -1 on error which includes producing an empty message.
1174 */
1175 struct ibuf *
up_dump_update(struct rde_peer * peer,uint8_t aid)1176 up_dump_update(struct rde_peer *peer, uint8_t aid)
1177 {
1178 struct ibuf *buf;
1179 struct bgpd_addr addr;
1180 struct prefix *p;
1181 size_t off, pkgsize = MAX_PKTSIZE;
1182 uint16_t len;
1183 int force_ip4mp = 0;
1184
1185 p = RB_MIN(prefix_tree, &peer->updates[aid]);
1186 if (p == NULL)
1187 return NULL;
1188
1189 if (peer_has_ext_msg(peer))
1190 pkgsize = MAX_EXT_PKTSIZE;
1191
1192 if (aid == AID_INET && peer_has_ext_nexthop(peer, AID_INET)) {
1193 struct nexthop *nh = prefix_nexthop(p);
1194 if (nh != NULL && nh->exit_nexthop.aid == AID_INET6)
1195 force_ip4mp = 1;
1196 }
1197
1198 if ((buf = ibuf_dynamic(4, pkgsize - MSGSIZE_HEADER)) == NULL)
1199 goto fail;
1200
1201 /* withdrawn routes length field is 0 */
1202 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1203 goto fail;
1204
1205 /* reserve space for 2-byte path attribute length */
1206 off = ibuf_size(buf);
1207 if (ibuf_add_zero(buf, sizeof(len)) == -1)
1208 goto fail;
1209
1210 if (up_generate_attr(buf, peer, prefix_aspath(p),
1211 prefix_communities(p), prefix_nexthop(p), aid) == -1)
1212 goto drop;
1213
1214 if (aid != AID_INET || force_ip4mp) {
1215 /* write mp attribute including nlri */
1216
1217 /*
1218 * RFC 7606 wants this to be first but then we need
1219 * to use multiple buffers with adjusted length to
1220 * merge the attributes together in reverse order of
1221 * creation.
1222 */
1223 if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) ==
1224 -1)
1225 goto drop;
1226 }
1227
1228 /* update attribute length field */
1229 len = ibuf_size(buf) - off - sizeof(len);
1230 if (ibuf_set_n16(buf, off, len) == -1)
1231 goto fail;
1232
1233 if (aid == AID_INET && !force_ip4mp) {
1234 /* last but not least dump the IPv4 nlri */
1235 if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1)
1236 goto drop;
1237 }
1238
1239 return buf;
1240
1241 drop:
1242 /* Not enough space. Drop current prefix, it will never fit. */
1243 p = RB_MIN(prefix_tree, &peer->updates[aid]);
1244 pt_getaddr(p->pt, &addr);
1245 log_peer_warnx(&peer->conf, "generating update failed, "
1246 "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen);
1247
1248 up_prefix_free(&peer->updates[aid], p, peer, 0);
1249 return up_dump_withdraw_one(peer, p, buf);
1250
1251 fail:
1252 /* something went horribly wrong */
1253 log_peer_warn(&peer->conf, "generating update failed, peer desynced");
1254 ibuf_free(buf);
1255 return NULL;
1256 }
1257