net: introduce ethernet teaming device

This patch introduces new network device called team. It supposes to be
very fast, simple, userspace-driven alternative to existing bonding
driver.

Userspace library called libteam with couple of demo apps is available
here:
https://github.com/jpirko/libteam
Note it's still in its dipers atm.

team<->libteam use generic netlink for communication. That and rtnl
suppose to be the only way to configure team device, no sysfs etc.

Python binding of libteam was recently introduced.
Daemon providing arpmon/miimon active-backup functionality will be
introduced shortly. All what's necessary is already implemented in
kernel team driver.

v7->v8:
	- check ndo_ndo_vlan_rx_[add/kill]_vid functions before calling
	  them.
	- use dev_kfree_skb_any() instead of dev_kfree_skb()

v6->v7:
	- transmit and receive functions are not checked in hot paths.
	  That also resolves memory leak on transmit when no port is
	  present

v5->v6:
	- changed couple of _rcu calls to non _rcu ones in non-readers

v4->v5:
	- team_change_mtu() uses team->lock while travesing though port
	  list
	- mac address changes are moved completely to jurisdiction of
	  userspace daemon. This way the daemon can do FOM1, FOM2 and
	  possibly other weird things with mac addresses.
	  Only round-robin mode sets up all ports to bond's address then
	  enslaved.
	- Extended Kconfig text

v3->v4:
	- remove redundant synchronize_rcu from __team_change_mode()
	- revert "set and clear of mode_ops happens per pointer, not per
	  byte"
	- extend comment of function __team_change_mode()

v2->v3:
	- team_change_mtu() uses rcu version of list traversal to unwind
	- set and clear of mode_ops happens per pointer, not per byte
	- port hashlist changed to be embedded into team structure
	- error branch in team_port_enter() does cleanup now
	- fixed rtln->rtnl

v1->v2:
	- modes are made as modules. Makes team more modular and
	  extendable.
	- several commenters' nitpicks found on v1 were fixed
	- several other bugs were fixed.
	- note I ignored Eric's comment about roundrobin port selector
	  as Eric's way may be easily implemented as another mode (mode
	  "random") in future.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Jiri Pirko 2011-11-11 22:16:48 +00:00 committed by David S. Miller
parent 5d70b88cd4
commit 3d249d4ca7
12 changed files with 2133 additions and 0 deletions

View File

@ -0,0 +1,2 @@
Team devices are driven from userspace via libteam library which is here:
https://github.com/jpirko/libteam

View File

@ -6484,6 +6484,13 @@ W: http://tcp-lp-mod.sourceforge.net/
S: Maintained S: Maintained
F: net/ipv4/tcp_lp.c F: net/ipv4/tcp_lp.c
TEAM DRIVER
M: Jiri Pirko <jpirko@redhat.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/team/
F: include/linux/if_team.h
TEGRA SUPPORT TEGRA SUPPORT
M: Colin Cross <ccross@android.com> M: Colin Cross <ccross@android.com>
M: Olof Johansson <olof@lixom.net> M: Olof Johansson <olof@lixom.net>

View File

@ -125,6 +125,8 @@ config IFB
'ifb1' etc. 'ifb1' etc.
Look at the iproute2 documentation directory for usage etc Look at the iproute2 documentation directory for usage etc
source "drivers/net/team/Kconfig"
config MACVLAN config MACVLAN
tristate "MAC-VLAN support (EXPERIMENTAL)" tristate "MAC-VLAN support (EXPERIMENTAL)"
depends on EXPERIMENTAL depends on EXPERIMENTAL

View File

@ -17,6 +17,7 @@ obj-$(CONFIG_NET) += Space.o loopback.o
obj-$(CONFIG_NETCONSOLE) += netconsole.o obj-$(CONFIG_NETCONSOLE) += netconsole.o
obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_PHYLIB) += phy/
obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_RIONET) += rionet.o
obj-$(CONFIG_NET_TEAM) += team/
obj-$(CONFIG_TUN) += tun.o obj-$(CONFIG_TUN) += tun.o
obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VETH) += veth.o
obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o

43
drivers/net/team/Kconfig Normal file
View File

@ -0,0 +1,43 @@
menuconfig NET_TEAM
tristate "Ethernet team driver support (EXPERIMENTAL)"
depends on EXPERIMENTAL
---help---
This allows one to create virtual interfaces that teams together
multiple ethernet devices.
Team devices can be added using the "ip" command from the
iproute2 package:
"ip link add link [ address MAC ] [ NAME ] type team"
To compile this driver as a module, choose M here: the module
will be called team.
if NET_TEAM
config NET_TEAM_MODE_ROUNDROBIN
tristate "Round-robin mode support"
depends on NET_TEAM
---help---
Basic mode where port used for transmitting packets is selected in
round-robin fashion using packet counter.
All added ports are setup to have bond's mac address.
To compile this team mode as a module, choose M here: the module
will be called team_mode_roundrobin.
config NET_TEAM_MODE_ACTIVEBACKUP
tristate "Active-backup mode support"
depends on NET_TEAM
---help---
Only one port is active at a time and the rest of ports are used
for backup.
Mac addresses of ports are not modified. Userspace is responsible
to do so.
To compile this team mode as a module, choose M here: the module
will be called team_mode_activebackup.
endif # NET_TEAM

View File

@ -0,0 +1,7 @@
#
# Makefile for the network team driver
#
obj-$(CONFIG_NET_TEAM) += team.o
obj-$(CONFIG_NET_TEAM_MODE_ROUNDROBIN) += team_mode_roundrobin.o
obj-$(CONFIG_NET_TEAM_MODE_ACTIVEBACKUP) += team_mode_activebackup.o

1583
drivers/net/team/team.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,137 @@
/*
* net/drivers/team/team_mode_activebackup.c - Active-backup mode for team
* Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <net/rtnetlink.h>
#include <linux/if_team.h>
struct ab_priv {
struct team_port __rcu *active_port;
};
static struct ab_priv *ab_priv(struct team *team)
{
return (struct ab_priv *) &team->mode_priv;
}
static rx_handler_result_t ab_receive(struct team *team, struct team_port *port,
struct sk_buff *skb) {
struct team_port *active_port;
active_port = rcu_dereference(ab_priv(team)->active_port);
if (active_port != port)
return RX_HANDLER_EXACT;
return RX_HANDLER_ANOTHER;
}
static bool ab_transmit(struct team *team, struct sk_buff *skb)
{
struct team_port *active_port;
active_port = rcu_dereference(ab_priv(team)->active_port);
if (unlikely(!active_port))
goto drop;
skb->dev = active_port->dev;
if (dev_queue_xmit(skb))
return false;
return true;
drop:
dev_kfree_skb_any(skb);
return false;
}
static void ab_port_leave(struct team *team, struct team_port *port)
{
if (ab_priv(team)->active_port == port)
rcu_assign_pointer(ab_priv(team)->active_port, NULL);
}
static int ab_active_port_get(struct team *team, void *arg)
{
u32 *ifindex = arg;
*ifindex = 0;
if (ab_priv(team)->active_port)
*ifindex = ab_priv(team)->active_port->dev->ifindex;
return 0;
}
static int ab_active_port_set(struct team *team, void *arg)
{
u32 *ifindex = arg;
struct team_port *port;
list_for_each_entry_rcu(port, &team->port_list, list) {
if (port->dev->ifindex == *ifindex) {
rcu_assign_pointer(ab_priv(team)->active_port, port);
return 0;
}
}
return -ENOENT;
}
static struct team_option ab_options[] = {
{
.name = "activeport",
.type = TEAM_OPTION_TYPE_U32,
.getter = ab_active_port_get,
.setter = ab_active_port_set,
},
};
int ab_init(struct team *team)
{
team_options_register(team, ab_options, ARRAY_SIZE(ab_options));
return 0;
}
void ab_exit(struct team *team)
{
team_options_unregister(team, ab_options, ARRAY_SIZE(ab_options));
}
static const struct team_mode_ops ab_mode_ops = {
.init = ab_init,
.exit = ab_exit,
.receive = ab_receive,
.transmit = ab_transmit,
.port_leave = ab_port_leave,
};
static struct team_mode ab_mode = {
.kind = "activebackup",
.owner = THIS_MODULE,
.priv_size = sizeof(struct ab_priv),
.ops = &ab_mode_ops,
};
static int __init ab_init_module(void)
{
return team_mode_register(&ab_mode);
}
static void __exit ab_cleanup_module(void)
{
team_mode_unregister(&ab_mode);
}
module_init(ab_init_module);
module_exit(ab_cleanup_module);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
MODULE_DESCRIPTION("Active-backup mode for team");
MODULE_ALIAS("team-mode-activebackup");

View File

@ -0,0 +1,107 @@
/*
* net/drivers/team/team_mode_roundrobin.c - Round-robin mode for team
* Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/if_team.h>
struct rr_priv {
unsigned int sent_packets;
};
static struct rr_priv *rr_priv(struct team *team)
{
return (struct rr_priv *) &team->mode_priv;
}
static struct team_port *__get_first_port_up(struct team *team,
struct team_port *port)
{
struct team_port *cur;
if (port->linkup)
return port;
cur = port;
list_for_each_entry_continue_rcu(cur, &team->port_list, list)
if (cur->linkup)
return cur;
list_for_each_entry_rcu(cur, &team->port_list, list) {
if (cur == port)
break;
if (cur->linkup)
return cur;
}
return NULL;
}
static bool rr_transmit(struct team *team, struct sk_buff *skb)
{
struct team_port *port;
int port_index;
port_index = rr_priv(team)->sent_packets++ % team->port_count;
port = team_get_port_by_index_rcu(team, port_index);
port = __get_first_port_up(team, port);
if (unlikely(!port))
goto drop;
skb->dev = port->dev;
if (dev_queue_xmit(skb))
return false;
return true;
drop:
dev_kfree_skb_any(skb);
return false;
}
static int rr_port_enter(struct team *team, struct team_port *port)
{
return team_port_set_team_mac(port);
}
static void rr_port_change_mac(struct team *team, struct team_port *port)
{
team_port_set_team_mac(port);
}
static const struct team_mode_ops rr_mode_ops = {
.transmit = rr_transmit,
.port_enter = rr_port_enter,
.port_change_mac = rr_port_change_mac,
};
static struct team_mode rr_mode = {
.kind = "roundrobin",
.owner = THIS_MODULE,
.priv_size = sizeof(struct rr_priv),
.ops = &rr_mode_ops,
};
static int __init rr_init_module(void)
{
return team_mode_register(&rr_mode);
}
static void __exit rr_cleanup_module(void)
{
team_mode_unregister(&rr_mode);
}
module_init(rr_init_module);
module_exit(rr_cleanup_module);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
MODULE_DESCRIPTION("Round-robin mode for team");
MODULE_ALIAS("team-mode-roundrobin");

View File

@ -185,6 +185,7 @@ header-y += if_pppol2tp.h
header-y += if_pppox.h header-y += if_pppox.h
header-y += if_slip.h header-y += if_slip.h
header-y += if_strip.h header-y += if_strip.h
header-y += if_team.h
header-y += if_tr.h header-y += if_tr.h
header-y += if_tun.h header-y += if_tun.h
header-y += if_tunnel.h header-y += if_tunnel.h

View File

@ -79,6 +79,7 @@
#define IFF_TX_SKB_SHARING 0x10000 /* The interface supports sharing #define IFF_TX_SKB_SHARING 0x10000 /* The interface supports sharing
* skbs on transmit */ * skbs on transmit */
#define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */ #define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */
#define IFF_TEAM_PORT 0x40000 /* device used as team port */
#define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_IFACE 0x0001 /* for querying only */
#define IF_GET_PROTO 0x0002 #define IF_GET_PROTO 0x0002

242
include/linux/if_team.h Normal file
View File

@ -0,0 +1,242 @@
/*
* include/linux/if_team.h - Network team device driver header
* Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#ifndef _LINUX_IF_TEAM_H_
#define _LINUX_IF_TEAM_H_
#ifdef __KERNEL__
struct team_pcpu_stats {
u64 rx_packets;
u64 rx_bytes;
u64 rx_multicast;
u64 tx_packets;
u64 tx_bytes;
struct u64_stats_sync syncp;
u32 rx_dropped;
u32 tx_dropped;
};
struct team;
struct team_port {
struct net_device *dev;
struct hlist_node hlist; /* node in hash list */
struct list_head list; /* node in ordinary list */
struct team *team;
int index;
/*
* A place for storing original values of the device before it
* become a port.
*/
struct {
unsigned char dev_addr[MAX_ADDR_LEN];
unsigned int mtu;
} orig;
bool linkup;
u32 speed;
u8 duplex;
struct rcu_head rcu;
};
struct team_mode_ops {
int (*init)(struct team *team);
void (*exit)(struct team *team);
rx_handler_result_t (*receive)(struct team *team,
struct team_port *port,
struct sk_buff *skb);
bool (*transmit)(struct team *team, struct sk_buff *skb);
int (*port_enter)(struct team *team, struct team_port *port);
void (*port_leave)(struct team *team, struct team_port *port);
void (*port_change_mac)(struct team *team, struct team_port *port);
};
enum team_option_type {
TEAM_OPTION_TYPE_U32,
TEAM_OPTION_TYPE_STRING,
};
struct team_option {
struct list_head list;
const char *name;
enum team_option_type type;
int (*getter)(struct team *team, void *arg);
int (*setter)(struct team *team, void *arg);
};
struct team_mode {
struct list_head list;
const char *kind;
struct module *owner;
size_t priv_size;
const struct team_mode_ops *ops;
};
#define TEAM_PORT_HASHBITS 4
#define TEAM_PORT_HASHENTRIES (1 << TEAM_PORT_HASHBITS)
#define TEAM_MODE_PRIV_LONGS 4
#define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS)
struct team {
struct net_device *dev; /* associated netdevice */
struct team_pcpu_stats __percpu *pcpu_stats;
spinlock_t lock; /* used for overall locking, e.g. port lists write */
/*
* port lists with port count
*/
int port_count;
struct hlist_head port_hlist[TEAM_PORT_HASHENTRIES];
struct list_head port_list;
struct list_head option_list;
const struct team_mode *mode;
struct team_mode_ops ops;
long mode_priv[TEAM_MODE_PRIV_LONGS];
};
static inline struct hlist_head *team_port_index_hash(struct team *team,
int port_index)
{
return &team->port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)];
}
static inline struct team_port *team_get_port_by_index(struct team *team,
int port_index)
{
struct hlist_node *p;
struct team_port *port;
struct hlist_head *head = team_port_index_hash(team, port_index);
hlist_for_each_entry(port, p, head, hlist)
if (port->index == port_index)
return port;
return NULL;
}
static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
int port_index)
{
struct hlist_node *p;
struct team_port *port;
struct hlist_head *head = team_port_index_hash(team, port_index);
hlist_for_each_entry_rcu(port, p, head, hlist)
if (port->index == port_index)
return port;
return NULL;
}
extern int team_port_set_team_mac(struct team_port *port);
extern void team_options_register(struct team *team,
struct team_option *option,
size_t option_count);
extern void team_options_unregister(struct team *team,
struct team_option *option,
size_t option_count);
extern int team_mode_register(struct team_mode *mode);
extern int team_mode_unregister(struct team_mode *mode);
#endif /* __KERNEL__ */
#define TEAM_STRING_MAX_LEN 32
/**********************************
* NETLINK_GENERIC netlink family.
**********************************/
enum {
TEAM_CMD_NOOP,
TEAM_CMD_OPTIONS_SET,
TEAM_CMD_OPTIONS_GET,
TEAM_CMD_PORT_LIST_GET,
__TEAM_CMD_MAX,
TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1),
};
enum {
TEAM_ATTR_UNSPEC,
TEAM_ATTR_TEAM_IFINDEX, /* u32 */
TEAM_ATTR_LIST_OPTION, /* nest */
TEAM_ATTR_LIST_PORT, /* nest */
__TEAM_ATTR_MAX,
TEAM_ATTR_MAX = __TEAM_ATTR_MAX - 1,
};
/* Nested layout of get/set msg:
*
* [TEAM_ATTR_LIST_OPTION]
* [TEAM_ATTR_ITEM_OPTION]
* [TEAM_ATTR_OPTION_*], ...
* [TEAM_ATTR_ITEM_OPTION]
* [TEAM_ATTR_OPTION_*], ...
* ...
* [TEAM_ATTR_LIST_PORT]
* [TEAM_ATTR_ITEM_PORT]
* [TEAM_ATTR_PORT_*], ...
* [TEAM_ATTR_ITEM_PORT]
* [TEAM_ATTR_PORT_*], ...
* ...
*/
enum {
TEAM_ATTR_ITEM_OPTION_UNSPEC,
TEAM_ATTR_ITEM_OPTION, /* nest */
__TEAM_ATTR_ITEM_OPTION_MAX,
TEAM_ATTR_ITEM_OPTION_MAX = __TEAM_ATTR_ITEM_OPTION_MAX - 1,
};
enum {
TEAM_ATTR_OPTION_UNSPEC,
TEAM_ATTR_OPTION_NAME, /* string */
TEAM_ATTR_OPTION_CHANGED, /* flag */
TEAM_ATTR_OPTION_TYPE, /* u8 */
TEAM_ATTR_OPTION_DATA, /* dynamic */
__TEAM_ATTR_OPTION_MAX,
TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1,
};
enum {
TEAM_ATTR_ITEM_PORT_UNSPEC,
TEAM_ATTR_ITEM_PORT, /* nest */
__TEAM_ATTR_ITEM_PORT_MAX,
TEAM_ATTR_ITEM_PORT_MAX = __TEAM_ATTR_ITEM_PORT_MAX - 1,
};
enum {
TEAM_ATTR_PORT_UNSPEC,
TEAM_ATTR_PORT_IFINDEX, /* u32 */
TEAM_ATTR_PORT_CHANGED, /* flag */
TEAM_ATTR_PORT_LINKUP, /* flag */
TEAM_ATTR_PORT_SPEED, /* u32 */
TEAM_ATTR_PORT_DUPLEX, /* u8 */
__TEAM_ATTR_PORT_MAX,
TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1,
};
/*
* NETLINK_GENERIC related info
*/
#define TEAM_GENL_NAME "team"
#define TEAM_GENL_VERSION 0x1
#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event"
#endif /* _LINUX_IF_TEAM_H_ */