Merge branch 'sock-diag-destroy'

Lorenzo Colitti says:

====================
Support administratively closing application sockets

This patchset adds the ability to administratively close a socket
without any action from the process owning the socket or the
socket protocol.

It implements this by adding a new diag_destroy function pointer
to struct proto. In-kernel callers can access this functionality
directly by calling sk->sk_prot->diag_destroy(sk, err).

It also exposes this functionality to userspace via a new
SOCK_DESTROY operation in the NETLINK_SOCK_DIAG sockets. This
allows a privileged userspace process, such as a connection
manager or system administration tool, to close sockets belonging
to other apps when the network they were established on has
disconnected. It is needed on laptops and mobile hosts to ensure
that network switches / disconnects do not result in applications
being blocked for long periods of time (minutes) in read or
connect calls on TCP sockets that will never succeed because the
IP address they are bound to is no longer on the system. Closing
the sockets causes these calls to fail fast and allows the apps
to reconnect on another network.

Userspace intervention is necessary because in many cases the
kernel does not have enough information to know that a connection
is now inoperable. The kernel can know if a packet can't be
routed, but in general it won't know if a TCP connection is stuck
because it is now routed to a network where its source address is
no longer valid [5][6].

Many other operating systems offer similar functionality:

 - FreeBSD has had this since 5.4 in 2005 [2]. It is available
   to privileged userspace and there is a tool to use it [3].
 - The FreeBSD commit description states that the idea came
   from OpenBSD.
 - iOS has been administratively closing app sockets since
   iOS 4 - see [4], which states that a socket "might get
   reclaimed by the kernel" and after that will return EBADF].
   For many years Android kernels have supported this via an
   out-of-tree SIOCKILLADDR ioctl that is called on every
   RTM_DELADDR event, but this solution is cleaner, more robust
   and more flexible: the connection manager can iterate over all
   connections on the deleted IP address and close all of them.
   It can also be used to close all sockets opened by a given app
   process, for example if the user has restricted that app from
   using the network, if a secure network such as a VPN has
   connected and security policy requires all of an application's
   connections to be routed via the VPN, etc.
 - For many years Android kernels have supported an out-of-tree
   SIOCKILLADDR ioctl that is called when a network disconnects
   or an RTM_DELADDR event is received. This solution is cleaner,
   more robust and more flexible. The connection manager can
   implement SIOCKILLADDR by iterating over all connections on
   the deleted IP address and close all of them, but it can also
   close all sockets opened by a given app process (for example
   if the user has restricted that app from), close all of a
   user's TCP connections if a user has connected a secure
   network such as a VPN and expects all of an application's
   connections to be routed via the VPN, etc.

Alternative schemes such as TCP keepalives in combination with
"iptables -j REJECT --reject-with tcp-reset", could be used to
achieve similar results, but on mobile devices TCP keepalives are
very expensive, and in such a scheme detecting stuck connections
has to wait for a keepalive to be sent or the application to
perform a write. An explicit notification from userspace is
cheaper and faster in the common case where an application is
blocked on read.

SOCK_DESTROY is placed behind an INET_DIAG_DESTROY configuration
option, which is currently off by default.

The TCP implementation of diag_destroy causes a TCP ABORT as
specified by RFC 793 [1]: immediately send a RST and clear local
connection state. This is what happens today if an application
enables SO_LINGER with a timeout of 0 and then calls close.

The first versions of the patchset did not send a RST, but that
is not graceful/correct TCP behaviour. tcp_abort now does a
proper RFC 793 ABORT and sends a RST to the peer. This is
consistent with BSD's tcpdrop, and is more correct in general,
even though in many use cases tcp_abort will only be called when
sending a RST is no longer possible (e.g., the network has
disconnected).

The original patchset also behaved like SIOCKILADDR and closed
TCP sockets with ETIMEDOUT. Tom Herbert pointed out that it would
be better if applications could distinguish between a timeout and
an administrative close. ECONNABORTED was chosen because it is
consistent with BSD.

[1] http://tools.ietf.org/html/rfc793#page-50
[2] http://svnweb.freebsd.org/base?view=revision&revision=141381
[3] https://www.freebsd.org/cgi/man.cgi?query=tcpdrop&sektion=8&manpath=FreeBSD+5.4-RELEASE
[4] https://developer.apple.com/library/ios/technotes/tn2277/_index.html#//apple_ref/doc/uid/DTS40010841-CH1-SUBSECTION3
[5] http://www.spinics.net/lists/netdev/msg352775.html
[6] http://www.spinics.net/lists/netdev/msg352952.html
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-12-15 23:26:52 -05:00
commit 5cfe6d8adb
12 changed files with 143 additions and 26 deletions

View File

@ -3,6 +3,7 @@
#include <uapi/linux/inet_diag.h>
struct net;
struct sock;
struct inet_hashinfo;
struct nlattr;
@ -23,6 +24,10 @@ struct inet_diag_handler {
void (*idiag_get_info)(struct sock *sk,
struct inet_diag_msg *r,
void *info);
int (*destroy)(struct sk_buff *in_skb,
const struct inet_diag_req_v2 *req);
__u16 idiag_type;
__u16 idiag_info_size;
};
@ -41,6 +46,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
struct sk_buff *in_skb, const struct nlmsghdr *nlh,
const struct inet_diag_req_v2 *req);
struct sock *inet_diag_find_one_icsk(struct net *net,
struct inet_hashinfo *hashinfo,
const struct inet_diag_req_v2 *req);
int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
extern int inet_diag_register(const struct inet_diag_handler *handler);

View File

@ -15,6 +15,7 @@ struct sock_diag_handler {
__u8 family;
int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
int (*get_info)(struct sk_buff *skb, struct sock *sk);
int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh);
};
int sock_diag_register(const struct sock_diag_handler *h);
@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk)
}
void sock_diag_broadcast_destroy(struct sock *sk);
int sock_diag_destroy(struct sock *sk, int err);
#endif

View File

@ -1060,6 +1060,7 @@ struct proto {
void (*destroy_cgroup)(struct mem_cgroup *memcg);
struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg);
#endif
int (*diag_destroy)(struct sock *sk, int err);
};
int proto_register(struct proto *prot, int alloc_slab);

View File

@ -1170,6 +1170,8 @@ void tcp_set_state(struct sock *sk, int state);
void tcp_done(struct sock *sk);
int tcp_abort(struct sock *sk, int err);
static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
{
rx_opt->dsack = 0;

View File

@ -4,6 +4,7 @@
#include <linux/types.h>
#define SOCK_DIAG_BY_FAMILY 20
#define SOCK_DESTROY 21
struct sock_diag_req {
__u8 sdiag_family;

View File

@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
}
EXPORT_SYMBOL_GPL(sock_diag_unregister);
static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
{
int err;
struct sock_diag_req *req = nlmsg_data(nlh);
@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
hndl = sock_diag_handlers[req->sdiag_family];
if (hndl == NULL)
err = -ENOENT;
else
else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
err = hndl->dump(skb, nlh);
else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
err = hndl->destroy(skb, nlh);
else
err = -EOPNOTSUPP;
mutex_unlock(&sock_diag_table_mutex);
return err;
@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return ret;
case SOCK_DIAG_BY_FAMILY:
return __sock_diag_rcv_msg(skb, nlh);
case SOCK_DESTROY:
return __sock_diag_cmd(skb, nlh);
default:
return -EINVAL;
}
@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group)
return 0;
}
int sock_diag_destroy(struct sock *sk, int err)
{
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (!sk->sk_prot->diag_destroy)
return -EOPNOTSUPP;
return sk->sk_prot->diag_destroy(sk, err);
}
EXPORT_SYMBOL_GPL(sock_diag_destroy);
static int __net_init diag_net_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {

View File

@ -436,6 +436,19 @@ config INET_UDP_DIAG
Support for UDP socket monitoring interface used by the ss tool.
If unsure, say Y.
config INET_DIAG_DESTROY
bool "INET: allow privileged process to administratively close sockets"
depends on INET_DIAG
default n
---help---
Provides a SOCK_DESTROY operation that allows privileged processes
(e.g., a connection manager or a network administration tool such as
ss) to close sockets opened by other processes. Closing a socket in
this way interrupts any blocking read/write/connect operations on
the socket and causes future socket calls to behave as if the socket
had been disconnected.
If unsure, say N.
menuconfig TCP_CONG_ADVANCED
bool "TCP: advanced congestion control"
---help---

View File

@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
nlmsg_flags, unlh);
}
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
const struct inet_diag_req_v2 *req)
struct sock *inet_diag_find_one_icsk(struct net *net,
struct inet_hashinfo *hashinfo,
const struct inet_diag_req_v2 *req)
{
struct net *net = sock_net(in_skb->sk);
struct sk_buff *rep;
struct sock *sk;
int err;
err = -EINVAL;
if (req->sdiag_family == AF_INET)
sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
req->id.idiag_dport, req->id.idiag_src[0],
@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
req->id.idiag_if);
#endif
else
goto out_nosk;
return ERR_PTR(-EINVAL);
err = -ENOENT;
if (!sk)
goto out_nosk;
return ERR_PTR(-ENOENT);
err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
if (err)
goto out;
if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
sock_gen_put(sk);
return ERR_PTR(-ENOENT);
}
return sk;
}
EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
const struct inet_diag_req_v2 *req)
{
struct net *net = sock_net(in_skb->sk);
struct sk_buff *rep;
struct sock *sk;
int err;
sk = inet_diag_find_one_icsk(net, hashinfo, req);
if (IS_ERR(sk))
return PTR_ERR(sk);
rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
if (!rep) {
@ -409,12 +422,11 @@ out:
if (sk)
sock_gen_put(sk);
out_nosk:
return err;
}
EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
static int inet_diag_get_exact(struct sk_buff *in_skb,
static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
const struct inet_diag_req_v2 *req)
{
@ -424,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
handler = inet_diag_lock_handler(req->sdiag_protocol);
if (IS_ERR(handler))
err = PTR_ERR(handler);
else
else if (cmd == SOCK_DIAG_BY_FAMILY)
err = handler->dump_one(in_skb, nlh, req);
else if (cmd == SOCK_DESTROY && handler->destroy)
err = handler->destroy(in_skb, req);
else
err = -EOPNOTSUPP;
inet_diag_unlock_handler(handler);
return err;
@ -938,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
req.idiag_states = rc->idiag_states;
req.id = rc->id;
return inet_diag_get_exact(in_skb, nlh, &req);
return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
}
static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@ -972,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
return inet_diag_get_exact_compat(skb, nlh);
}
static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
{
int hdrlen = sizeof(struct inet_diag_req_v2);
struct net *net = sock_net(skb->sk);
@ -980,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
if (nlmsg_len(h) < hdrlen)
return -EINVAL;
if (h->nlmsg_flags & NLM_F_DUMP) {
if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
h->nlmsg_flags & NLM_F_DUMP) {
if (nlmsg_attrlen(h, hdrlen)) {
struct nlattr *attr;
@ -999,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
}
}
return inet_diag_get_exact(skb, h, nlmsg_data(h));
return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
}
static
@ -1050,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
static const struct sock_diag_handler inet_diag_handler = {
.family = AF_INET,
.dump = inet_diag_handler_dump,
.dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
.destroy = inet_diag_handler_cmd,
};
static const struct sock_diag_handler inet6_diag_handler = {
.family = AF_INET6,
.dump = inet_diag_handler_dump,
.dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
.destroy = inet_diag_handler_cmd,
};
int inet_diag_register(const struct inet_diag_handler *h)

View File

@ -3080,6 +3080,38 @@ void tcp_done(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_done);
int tcp_abort(struct sock *sk, int err)
{
if (!sk_fullsock(sk)) {
sock_gen_put(sk);
return -EOPNOTSUPP;
}
/* Don't race with userspace socket closes such as tcp_close. */
lock_sock(sk);
/* Don't race with BH socket closes such as inet_csk_listen_stop. */
local_bh_disable();
bh_lock_sock(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_err = err;
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
sk->sk_error_report(sk);
if (tcp_need_reset(sk->sk_state))
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
}
bh_unlock_sock(sk);
local_bh_enable();
release_sock(sk);
sock_put(sk);
return 0;
}
EXPORT_SYMBOL_GPL(tcp_abort);
extern struct tcp_congestion_ops tcp_reno;
static __initdata unsigned long thash_entries;

View File

@ -10,6 +10,8 @@
*/
#include <linux/module.h>
#include <linux/net.h>
#include <linux/sock_diag.h>
#include <linux/inet_diag.h>
#include <linux/tcp.h>
@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
}
#ifdef CONFIG_INET_DIAG_DESTROY
static int tcp_diag_destroy(struct sk_buff *in_skb,
const struct inet_diag_req_v2 *req)
{
struct net *net = sock_net(in_skb->sk);
struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
if (IS_ERR(sk))
return PTR_ERR(sk);
return sock_diag_destroy(sk, ECONNABORTED);
}
#endif
static const struct inet_diag_handler tcp_diag_handler = {
.dump = tcp_diag_dump,
.dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
.idiag_type = IPPROTO_TCP,
.idiag_info_size = sizeof(struct tcp_info),
#ifdef CONFIG_INET_DIAG_DESTROY
.destroy = tcp_diag_destroy,
#endif
};
static int __init tcp_diag_init(void)

View File

@ -2342,6 +2342,7 @@ struct proto tcp_prot = {
.destroy_cgroup = tcp_destroy_cgroup,
.proto_cgroup = tcp_proto_cgroup,
#endif
.diag_destroy = tcp_abort,
};
EXPORT_SYMBOL(tcp_prot);

View File

@ -1890,6 +1890,7 @@ struct proto tcpv6_prot = {
.proto_cgroup = tcp_proto_cgroup,
#endif
.clear_sk = tcp_v6_clear_sk,
.diag_destroy = tcp_abort,
};
static const struct inet6_protocol tcpv6_protocol = {