br_if.c revision c835a677331495cf137a7f8a023463afd9f032f8
1/*
2 *	Userspace interface
3 *	Linux ethernet bridge
4 *
5 *	Authors:
6 *	Lennert Buytenhek		<buytenh@gnu.org>
7 *
8 *	This program is free software; you can redistribute it and/or
9 *	modify it under the terms of the GNU General Public License
10 *	as published by the Free Software Foundation; either version
11 *	2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/kernel.h>
15#include <linux/netdevice.h>
16#include <linux/etherdevice.h>
17#include <linux/netpoll.h>
18#include <linux/ethtool.h>
19#include <linux/if_arp.h>
20#include <linux/module.h>
21#include <linux/init.h>
22#include <linux/rtnetlink.h>
23#include <linux/if_ether.h>
24#include <linux/slab.h>
25#include <net/sock.h>
26#include <linux/if_vlan.h>
27
28#include "br_private.h"
29
30/*
31 * Determine initial path cost based on speed.
32 * using recommendations from 802.1d standard
33 *
34 * Since driver might sleep need to not be holding any locks.
35 */
36static int port_cost(struct net_device *dev)
37{
38	struct ethtool_cmd ecmd;
39
40	if (!__ethtool_get_settings(dev, &ecmd)) {
41		switch (ethtool_cmd_speed(&ecmd)) {
42		case SPEED_10000:
43			return 2;
44		case SPEED_1000:
45			return 4;
46		case SPEED_100:
47			return 19;
48		case SPEED_10:
49			return 100;
50		}
51	}
52
53	/* Old silly heuristics based on name */
54	if (!strncmp(dev->name, "lec", 3))
55		return 7;
56
57	if (!strncmp(dev->name, "plip", 4))
58		return 2500;
59
60	return 100;	/* assume old 10Mbps */
61}
62
63
64/* Check for port carrier transitions. */
65void br_port_carrier_check(struct net_bridge_port *p)
66{
67	struct net_device *dev = p->dev;
68	struct net_bridge *br = p->br;
69
70	if (!(p->flags & BR_ADMIN_COST) &&
71	    netif_running(dev) && netif_oper_up(dev))
72		p->path_cost = port_cost(dev);
73
74	if (!netif_running(br->dev))
75		return;
76
77	spin_lock_bh(&br->lock);
78	if (netif_running(dev) && netif_oper_up(dev)) {
79		if (p->state == BR_STATE_DISABLED)
80			br_stp_enable_port(p);
81	} else {
82		if (p->state != BR_STATE_DISABLED)
83			br_stp_disable_port(p);
84	}
85	spin_unlock_bh(&br->lock);
86}
87
88static void br_port_set_promisc(struct net_bridge_port *p)
89{
90	int err = 0;
91
92	if (br_promisc_port(p))
93		return;
94
95	err = dev_set_promiscuity(p->dev, 1);
96	if (err)
97		return;
98
99	br_fdb_unsync_static(p->br, p);
100	p->flags |= BR_PROMISC;
101}
102
103static void br_port_clear_promisc(struct net_bridge_port *p)
104{
105	int err;
106
107	/* Check if the port is already non-promisc or if it doesn't
108	 * support UNICAST filtering.  Without unicast filtering support
109	 * we'll end up re-enabling promisc mode anyway, so just check for
110	 * it here.
111	 */
112	if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
113		return;
114
115	/* Since we'll be clearing the promisc mode, program the port
116	 * first so that we don't have interruption in traffic.
117	 */
118	err = br_fdb_sync_static(p->br, p);
119	if (err)
120		return;
121
122	dev_set_promiscuity(p->dev, -1);
123	p->flags &= ~BR_PROMISC;
124}
125
126/* When a port is added or removed or when certain port flags
127 * change, this function is called to automatically manage
128 * promiscuity setting of all the bridge ports.  We are always called
129 * under RTNL so can skip using rcu primitives.
130 */
131void br_manage_promisc(struct net_bridge *br)
132{
133	struct net_bridge_port *p;
134	bool set_all = false;
135
136	/* If vlan filtering is disabled or bridge interface is placed
137	 * into promiscuous mode, place all ports in promiscuous mode.
138	 */
139	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br))
140		set_all = true;
141
142	list_for_each_entry(p, &br->port_list, list) {
143		if (set_all) {
144			br_port_set_promisc(p);
145		} else {
146			/* If the number of auto-ports is <= 1, then all other
147			 * ports will have their output configuration
148			 * statically specified through fdbs.  Since ingress
149			 * on the auto-port becomes forwarding/egress to other
150			 * ports and egress configuration is statically known,
151			 * we can say that ingress configuration of the
152			 * auto-port is also statically known.
153			 * This lets us disable promiscuous mode and write
154			 * this config to hw.
155			 */
156			if (br->auto_cnt == 0 ||
157			    (br->auto_cnt == 1 && br_auto_port(p)))
158				br_port_clear_promisc(p);
159			else
160				br_port_set_promisc(p);
161		}
162	}
163}
164
165static void nbp_update_port_count(struct net_bridge *br)
166{
167	struct net_bridge_port *p;
168	u32 cnt = 0;
169
170	list_for_each_entry(p, &br->port_list, list) {
171		if (br_auto_port(p))
172			cnt++;
173	}
174	if (br->auto_cnt != cnt) {
175		br->auto_cnt = cnt;
176		br_manage_promisc(br);
177	}
178}
179
180static void nbp_delete_promisc(struct net_bridge_port *p)
181{
182	/* If port is currently promiscuous, unset promiscuity.
183	 * Otherwise, it is a static port so remove all addresses
184	 * from it.
185	 */
186	dev_set_allmulti(p->dev, -1);
187	if (br_promisc_port(p))
188		dev_set_promiscuity(p->dev, -1);
189	else
190		br_fdb_unsync_static(p->br, p);
191}
192
193static void release_nbp(struct kobject *kobj)
194{
195	struct net_bridge_port *p
196		= container_of(kobj, struct net_bridge_port, kobj);
197	kfree(p);
198}
199
200static struct kobj_type brport_ktype = {
201#ifdef CONFIG_SYSFS
202	.sysfs_ops = &brport_sysfs_ops,
203#endif
204	.release = release_nbp,
205};
206
207static void destroy_nbp(struct net_bridge_port *p)
208{
209	struct net_device *dev = p->dev;
210
211	p->br = NULL;
212	p->dev = NULL;
213	dev_put(dev);
214
215	kobject_put(&p->kobj);
216}
217
218static void destroy_nbp_rcu(struct rcu_head *head)
219{
220	struct net_bridge_port *p =
221			container_of(head, struct net_bridge_port, rcu);
222	destroy_nbp(p);
223}
224
225/* Delete port(interface) from bridge is done in two steps.
226 * via RCU. First step, marks device as down. That deletes
227 * all the timers and stops new packets from flowing through.
228 *
229 * Final cleanup doesn't occur until after all CPU's finished
230 * processing packets.
231 *
232 * Protected from multiple admin operations by RTNL mutex
233 */
234static void del_nbp(struct net_bridge_port *p)
235{
236	struct net_bridge *br = p->br;
237	struct net_device *dev = p->dev;
238
239	sysfs_remove_link(br->ifobj, p->dev->name);
240
241	nbp_delete_promisc(p);
242
243	spin_lock_bh(&br->lock);
244	br_stp_disable_port(p);
245	spin_unlock_bh(&br->lock);
246
247	br_ifinfo_notify(RTM_DELLINK, p);
248
249	list_del_rcu(&p->list);
250
251	nbp_vlan_flush(p);
252	br_fdb_delete_by_port(br, p, 1);
253	nbp_update_port_count(br);
254
255	dev->priv_flags &= ~IFF_BRIDGE_PORT;
256
257	netdev_rx_handler_unregister(dev);
258
259	netdev_upper_dev_unlink(dev, br->dev);
260
261	br_multicast_del_port(p);
262
263	kobject_uevent(&p->kobj, KOBJ_REMOVE);
264	kobject_del(&p->kobj);
265
266	br_netpoll_disable(p);
267
268	call_rcu(&p->rcu, destroy_nbp_rcu);
269}
270
271/* Delete bridge device */
272void br_dev_delete(struct net_device *dev, struct list_head *head)
273{
274	struct net_bridge *br = netdev_priv(dev);
275	struct net_bridge_port *p, *n;
276
277	list_for_each_entry_safe(p, n, &br->port_list, list) {
278		del_nbp(p);
279	}
280
281	br_fdb_delete_by_port(br, NULL, 1);
282
283	br_vlan_flush(br);
284	del_timer_sync(&br->gc_timer);
285
286	br_sysfs_delbr(br->dev);
287	unregister_netdevice_queue(br->dev, head);
288}
289
290/* find an available port number */
291static int find_portno(struct net_bridge *br)
292{
293	int index;
294	struct net_bridge_port *p;
295	unsigned long *inuse;
296
297	inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
298			GFP_KERNEL);
299	if (!inuse)
300		return -ENOMEM;
301
302	set_bit(0, inuse);	/* zero is reserved */
303	list_for_each_entry(p, &br->port_list, list) {
304		set_bit(p->port_no, inuse);
305	}
306	index = find_first_zero_bit(inuse, BR_MAX_PORTS);
307	kfree(inuse);
308
309	return (index >= BR_MAX_PORTS) ? -EXFULL : index;
310}
311
312/* called with RTNL but without bridge lock */
313static struct net_bridge_port *new_nbp(struct net_bridge *br,
314				       struct net_device *dev)
315{
316	int index;
317	struct net_bridge_port *p;
318
319	index = find_portno(br);
320	if (index < 0)
321		return ERR_PTR(index);
322
323	p = kzalloc(sizeof(*p), GFP_KERNEL);
324	if (p == NULL)
325		return ERR_PTR(-ENOMEM);
326
327	p->br = br;
328	dev_hold(dev);
329	p->dev = dev;
330	p->path_cost = port_cost(dev);
331	p->priority = 0x8000 >> BR_PORT_BITS;
332	p->port_no = index;
333	p->flags = BR_LEARNING | BR_FLOOD;
334	br_init_port(p);
335	p->state = BR_STATE_DISABLED;
336	br_stp_port_timer_init(p);
337	br_multicast_add_port(p);
338
339	return p;
340}
341
342int br_add_bridge(struct net *net, const char *name)
343{
344	struct net_device *dev;
345	int res;
346
347	dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
348			   br_dev_setup);
349
350	if (!dev)
351		return -ENOMEM;
352
353	dev_net_set(dev, net);
354	dev->rtnl_link_ops = &br_link_ops;
355
356	res = register_netdev(dev);
357	if (res)
358		free_netdev(dev);
359	return res;
360}
361
362int br_del_bridge(struct net *net, const char *name)
363{
364	struct net_device *dev;
365	int ret = 0;
366
367	rtnl_lock();
368	dev = __dev_get_by_name(net, name);
369	if (dev == NULL)
370		ret =  -ENXIO; 	/* Could not find device */
371
372	else if (!(dev->priv_flags & IFF_EBRIDGE)) {
373		/* Attempt to delete non bridge device! */
374		ret = -EPERM;
375	}
376
377	else if (dev->flags & IFF_UP) {
378		/* Not shutdown yet. */
379		ret = -EBUSY;
380	}
381
382	else
383		br_dev_delete(dev, NULL);
384
385	rtnl_unlock();
386	return ret;
387}
388
389/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
390int br_min_mtu(const struct net_bridge *br)
391{
392	const struct net_bridge_port *p;
393	int mtu = 0;
394
395	ASSERT_RTNL();
396
397	if (list_empty(&br->port_list))
398		mtu = ETH_DATA_LEN;
399	else {
400		list_for_each_entry(p, &br->port_list, list) {
401			if (!mtu  || p->dev->mtu < mtu)
402				mtu = p->dev->mtu;
403		}
404	}
405	return mtu;
406}
407
408/*
409 * Recomputes features using slave's features
410 */
411netdev_features_t br_features_recompute(struct net_bridge *br,
412	netdev_features_t features)
413{
414	struct net_bridge_port *p;
415	netdev_features_t mask;
416
417	if (list_empty(&br->port_list))
418		return features;
419
420	mask = features;
421	features &= ~NETIF_F_ONE_FOR_ALL;
422
423	list_for_each_entry(p, &br->port_list, list) {
424		features = netdev_increment_features(features,
425						     p->dev->features, mask);
426	}
427
428	return features;
429}
430
431/* called with RTNL */
432int br_add_if(struct net_bridge *br, struct net_device *dev)
433{
434	struct net_bridge_port *p;
435	int err = 0;
436	bool changed_addr;
437
438	/* Don't allow bridging non-ethernet like devices */
439	if ((dev->flags & IFF_LOOPBACK) ||
440	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
441	    !is_valid_ether_addr(dev->dev_addr))
442		return -EINVAL;
443
444	/* No bridging of bridges */
445	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
446		return -ELOOP;
447
448	/* Device is already being bridged */
449	if (br_port_exists(dev))
450		return -EBUSY;
451
452	/* No bridging devices that dislike that (e.g. wireless) */
453	if (dev->priv_flags & IFF_DONT_BRIDGE)
454		return -EOPNOTSUPP;
455
456	p = new_nbp(br, dev);
457	if (IS_ERR(p))
458		return PTR_ERR(p);
459
460	call_netdevice_notifiers(NETDEV_JOIN, dev);
461
462	err = dev_set_allmulti(dev, 1);
463	if (err)
464		goto put_back;
465
466	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
467				   SYSFS_BRIDGE_PORT_ATTR);
468	if (err)
469		goto err1;
470
471	err = br_sysfs_addif(p);
472	if (err)
473		goto err2;
474
475	err = br_netpoll_enable(p);
476	if (err)
477		goto err3;
478
479	err = netdev_master_upper_dev_link(dev, br->dev);
480	if (err)
481		goto err4;
482
483	err = netdev_rx_handler_register(dev, br_handle_frame, p);
484	if (err)
485		goto err5;
486
487	dev->priv_flags |= IFF_BRIDGE_PORT;
488
489	dev_disable_lro(dev);
490
491	list_add_rcu(&p->list, &br->port_list);
492
493	nbp_update_port_count(br);
494
495	netdev_update_features(br->dev);
496
497	if (br->dev->needed_headroom < dev->needed_headroom)
498		br->dev->needed_headroom = dev->needed_headroom;
499
500	if (br_fdb_insert(br, p, dev->dev_addr, 0))
501		netdev_err(dev, "failed insert local address bridge forwarding table\n");
502
503	spin_lock_bh(&br->lock);
504	changed_addr = br_stp_recalculate_bridge_id(br);
505
506	if (netif_running(dev) && netif_oper_up(dev) &&
507	    (br->dev->flags & IFF_UP))
508		br_stp_enable_port(p);
509	spin_unlock_bh(&br->lock);
510
511	br_ifinfo_notify(RTM_NEWLINK, p);
512
513	if (changed_addr)
514		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
515
516	dev_set_mtu(br->dev, br_min_mtu(br));
517
518	kobject_uevent(&p->kobj, KOBJ_ADD);
519
520	return 0;
521
522err5:
523	netdev_upper_dev_unlink(dev, br->dev);
524err4:
525	br_netpoll_disable(p);
526err3:
527	sysfs_remove_link(br->ifobj, p->dev->name);
528err2:
529	kobject_put(&p->kobj);
530	p = NULL; /* kobject_put frees */
531err1:
532	dev_set_allmulti(dev, -1);
533put_back:
534	dev_put(dev);
535	kfree(p);
536	return err;
537}
538
539/* called with RTNL */
540int br_del_if(struct net_bridge *br, struct net_device *dev)
541{
542	struct net_bridge_port *p;
543	bool changed_addr;
544
545	p = br_port_get_rtnl(dev);
546	if (!p || p->br != br)
547		return -EINVAL;
548
549	/* Since more than one interface can be attached to a bridge,
550	 * there still maybe an alternate path for netconsole to use;
551	 * therefore there is no reason for a NETDEV_RELEASE event.
552	 */
553	del_nbp(p);
554
555	spin_lock_bh(&br->lock);
556	changed_addr = br_stp_recalculate_bridge_id(br);
557	spin_unlock_bh(&br->lock);
558
559	if (changed_addr)
560		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
561
562	netdev_update_features(br->dev);
563
564	return 0;
565}
566
567void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
568{
569	struct net_bridge *br = p->br;
570
571	if (mask & BR_AUTO_MASK)
572		nbp_update_port_count(br);
573}
574