syzkaller/executor/common_linux.h

2263 lines
67 KiB
C
Raw Normal View History

2017-09-22 09:09:53 +00:00
// Copyright 2016 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
// This file is shared between executor and csource package.
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
2017-09-22 09:09:53 +00:00
#if SYZ_EXECUTOR
struct cover_t;
static void cover_reset(cover_t* cov);
2017-09-22 09:09:53 +00:00
#endif
#if SYZ_EXECUTOR || SYZ_THREADED
#include <linux/futex.h>
#include <pthread.h>
typedef struct {
int state;
} event_t;
static void event_init(event_t* ev)
{
ev->state = 0;
}
static void event_reset(event_t* ev)
2017-09-22 09:09:53 +00:00
{
ev->state = 0;
2017-09-22 09:09:53 +00:00
}
static void event_set(event_t* ev)
{
if (ev->state)
fail("event already set");
__atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG);
}
static void event_wait(event_t* ev)
{
while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}
static int event_isset(event_t* ev)
{
return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}
2017-09-22 09:09:53 +00:00
static int event_timedwait(event_t* ev, uint64 timeout)
2017-09-22 09:09:53 +00:00
{
uint64 start = current_time_ms();
uint64 now = start;
for (;;) {
uint64 remain = timeout - (now - start);
struct timespec ts;
ts.tv_sec = remain / 1000;
ts.tv_nsec = (remain % 1000) * 1000 * 1000;
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED))
return 1;
now = current_time_ms();
if (now - start > timeout)
return 0;
}
2017-09-22 09:09:53 +00:00
}
#endif
#if SYZ_EXECUTOR || SYZ_TUN_ENABLE || SYZ_ENABLE_NETDEV
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
2017-09-22 09:09:53 +00:00
static void vsnprintf_check(char* str, size_t size, const char* format, va_list args)
{
int rv;
rv = vsnprintf(str, size, format, args);
if (rv < 0)
fail("tun: snprintf failed");
if ((size_t)rv >= size)
fail("tun: string '%s...' doesn't fit into buffer", str);
}
#define COMMAND_MAX_LEN 128
#define PATH_PREFIX "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin "
#define PATH_PREFIX_LEN (sizeof(PATH_PREFIX) - 1)
2017-09-22 09:09:53 +00:00
static void execute_command(bool panic, const char* format, ...)
2017-09-22 09:09:53 +00:00
{
va_list args;
char command[PATH_PREFIX_LEN + COMMAND_MAX_LEN];
2017-09-22 09:09:53 +00:00
int rv;
va_start(args, format);
// Executor process does not have any env, including PATH.
// On some distributions, system/shell adds a minimal PATH, on some it does not.
// Set own standard PATH to make it work across distributions.
memcpy(command, PATH_PREFIX, PATH_PREFIX_LEN);
vsnprintf_check(command + PATH_PREFIX_LEN, COMMAND_MAX_LEN, format, args);
2017-09-22 09:09:53 +00:00
va_end(args);
2018-04-06 15:25:54 +00:00
rv = system(command);
if (rv) {
if (panic)
fail("command '%s' failed: %d", &command[0], rv);
debug("command '%s': %d\n", &command[0], rv);
}
2017-09-22 09:09:53 +00:00
}
#endif
2017-09-22 09:09:53 +00:00
#if SYZ_EXECUTOR || SYZ_TUN_ENABLE
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <net/if_arp.h>
#include <stdarg.h>
#include <stdbool.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
2017-09-22 09:09:53 +00:00
static int tunfd = -1;
static int tun_frags_enabled;
2017-09-22 09:09:53 +00:00
// We just need this to be large enough to hold headers that we parse (ethernet/ip/tcp).
// Rest of the packet (if any) will be silently truncated which is fine.
#define SYZ_TUN_MAX_PACKET_SIZE 1000
#define TUN_IFACE "syz_tun"
2017-09-22 09:09:53 +00:00
#define LOCAL_MAC "aa:aa:aa:aa:aa:aa"
#define REMOTE_MAC "aa:aa:aa:aa:aa:bb"
2017-09-22 09:09:53 +00:00
#define LOCAL_IPV4 "172.20.20.170"
#define REMOTE_IPV4 "172.20.20.187"
2017-09-22 09:09:53 +00:00
#define LOCAL_IPV6 "fe80::aa"
#define REMOTE_IPV6 "fe80::bb"
2017-09-22 09:09:53 +00:00
#ifndef IFF_NAPI
#define IFF_NAPI 0x0010
#endif
#ifndef IFF_NAPI_FRAGS
#define IFF_NAPI_FRAGS 0x0020
#endif
static void initialize_tun(void)
{
#if SYZ_EXECUTOR
if (!flag_enable_tun)
return;
#endif
2017-09-22 09:09:53 +00:00
tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK);
if (tunfd == -1) {
#if SYZ_EXECUTOR
fail("tun: can't open /dev/net/tun\n");
#else
printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n");
printf("otherwise fuzzing or reproducing might not work as intended\n");
return;
#endif
}
// Remap tun onto higher fd number to hide it from fuzzer and to keep
// fd numbers stable regardless of whether tun is opened or not (also see kMaxFd).
const int kTunFd = 240;
if (dup2(tunfd, kTunFd) < 0)
fail("dup2(tunfd, kTunFd) failed");
close(tunfd);
tunfd = kTunFd;
2017-09-22 09:09:53 +00:00
struct ifreq ifr;
memset(&ifr, 0, sizeof(ifr));
strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ);
ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS;
if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) {
// IFF_NAPI_FRAGS requires root, so try without it.
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0)
fail("tun: ioctl(TUNSETIFF) failed");
}
// If IFF_NAPI_FRAGS is not supported it will be silently dropped,
// so query the effective flags.
if (ioctl(tunfd, TUNGETIFF, (void*)&ifr) < 0)
fail("tun: ioctl(TUNGETIFF) failed");
tun_frags_enabled = (ifr.ifr_flags & IFF_NAPI_FRAGS) != 0;
debug("tun_frags_enabled=%d\n", tun_frags_enabled);
2017-09-22 09:09:53 +00:00
// Disable IPv6 DAD, otherwise the address remains unusable until DAD completes.
// Don't panic because this is an optional config.
execute_command(0, "sysctl -w net.ipv6.conf.%s.accept_dad=0", TUN_IFACE);
2017-09-22 09:09:53 +00:00
// Disable IPv6 router solicitation to prevent IPv6 spam.
// Don't panic because this is an optional config.
execute_command(0, "sysctl -w net.ipv6.conf.%s.router_solicitations=0", TUN_IFACE);
2017-09-22 09:09:53 +00:00
// There seems to be no way to disable IPv6 MTD to prevent more IPv6 spam.
execute_command(1, "ip link set dev %s address %s", TUN_IFACE, LOCAL_MAC);
execute_command(1, "ip addr add %s/24 dev %s", LOCAL_IPV4, TUN_IFACE);
execute_command(1, "ip neigh add %s lladdr %s dev %s nud permanent",
REMOTE_IPV4, REMOTE_MAC, TUN_IFACE);
// Don't panic because ipv6 may be not enabled in kernel.
execute_command(0, "ip -6 addr add %s/120 dev %s", LOCAL_IPV6, TUN_IFACE);
execute_command(0, "ip -6 neigh add %s lladdr %s dev %s nud permanent",
REMOTE_IPV6, REMOTE_MAC, TUN_IFACE);
execute_command(1, "ip link set dev %s up", TUN_IFACE);
}
#endif
#if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <net/if_arp.h>
#include <stdarg.h>
#include <stdbool.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/uio.h>
// Addresses are chosen to be in the same subnet as tun addresses.
#define DEV_IPV4 "172.20.20.%d"
#define DEV_IPV6 "fe80::%02hx"
#define DEV_MAC "aa:aa:aa:aa:aa:%02hx"
static void snprintf_check(char* str, size_t size, const char* format, ...)
{
va_list args;
va_start(args, format);
vsnprintf_check(str, size, format, args);
va_end(args);
}
// We test in a separate namespace, which does not have any network devices initially (even lo).
// Create/up as many as we can.
static void initialize_netdevices(void)
{
#if SYZ_EXECUTOR
if (!flag_enable_net_dev)
return;
#endif
unsigned i;
const char* devtypes[] = {"ip6gretap", "bridge", "vcan", "bond", "team"};
// If you extend this array, also update netdev_addr_id in vnet.txt.
const char* devnames[] = {"lo", "sit0", "bridge0", "vcan0", "tunl0",
"gre0", "gretap0", "ip_vti0", "ip6_vti0",
"ip6tnl0", "ip6gre0", "ip6gretap0",
"erspan0", "bond0", "veth0", "veth1", "team0",
"veth0_to_bridge", "veth1_to_bridge",
"veth0_to_bond", "veth1_to_bond",
"veth0_to_team", "veth1_to_team"};
const char* devmasters[] = {"bridge", "bond", "team"};
for (i = 0; i < sizeof(devtypes) / (sizeof(devtypes[0])); i++)
execute_command(0, "ip link add dev %s0 type %s", devtypes[i], devtypes[i]);
// This adds connected veth0 and veth1 devices.
execute_command(0, "ip link add type veth");
// This creates connected bridge/bond/team_slave devices of type veth,
// and makes them slaves of bridge/bond/team devices, respectively.
// Note: slave devices don't need MAC/IP addresses, only master devices.
// veth0_to_* is not slave devices, which still need ip addresses.
for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) {
execute_command(0, "ip link add name %s_slave_0 type veth peer name veth0_to_%s", devmasters[i], devmasters[i]);
execute_command(0, "ip link add name %s_slave_1 type veth peer name veth1_to_%s", devmasters[i], devmasters[i]);
execute_command(0, "ip link set %s_slave_0 master %s0", devmasters[i], devmasters[i]);
execute_command(0, "ip link set %s_slave_1 master %s0", devmasters[i], devmasters[i]);
execute_command(0, "ip link set veth0_to_%s up", devmasters[i]);
execute_command(0, "ip link set veth1_to_%s up", devmasters[i]);
}
// bond/team_slave_* will set up automatically when set their master.
// But bridge_slave_* need to set up manually.
execute_command(0, "ip link set bridge_slave_0 up");
execute_command(0, "ip link set bridge_slave_1 up");
for (i = 0; i < sizeof(devnames) / (sizeof(devnames[0])); i++) {
char addr[32];
// Assign some unique address to devices. Some devices won't up without this.
// Devices that don't need these addresses will simply ignore them.
// Shift addresses by 10 because 0 subnet address can mean special things.
snprintf_check(addr, sizeof(addr), DEV_IPV4, i + 10);
execute_command(0, "ip -4 addr add %s/24 dev %s", addr, devnames[i]);
snprintf_check(addr, sizeof(addr), DEV_IPV6, i + 10);
execute_command(0, "ip -6 addr add %s/120 dev %s", addr, devnames[i]);
snprintf_check(addr, sizeof(addr), DEV_MAC, i + 10);
execute_command(0, "ip link set dev %s address %s", devnames[i], addr);
execute_command(0, "ip link set dev %s up", devnames[i]);
}
2017-09-22 09:09:53 +00:00
}
#endif
#if SYZ_EXECUTOR || SYZ_TUN_ENABLE && (__NR_syz_extract_tcp_res || SYZ_REPEAT)
#include <errno.h>
2017-09-22 09:09:53 +00:00
static int read_tun(char* data, int size)
{
if (tunfd < 0)
return -1;
2017-09-22 09:09:53 +00:00
int rv = read(tunfd, data, size);
if (rv < 0) {
if (errno == EAGAIN)
return -1;
// Tun sometimes returns this, unclear if it's a kernel bug or not.
if (errno == EBADFD)
return -1;
fail("tun: read failed with %d", rv);
2017-09-22 09:09:53 +00:00
}
return rv;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_emit_ethernet && SYZ_TUN_ENABLE
#include <stdbool.h>
#include <sys/uio.h>
2017-09-22 09:09:53 +00:00
#define MAX_FRAGS 4
struct vnet_fragmentation {
uint32 full;
uint32 count;
uint32 frags[MAX_FRAGS];
};
2017-09-22 09:09:53 +00:00
static long syz_emit_ethernet(long a0, long a1, long a2)
{
// syz_emit_ethernet(len len[packet], packet ptr[in, eth_packet], frags ptr[in, vnet_fragmentation, opt])
// vnet_fragmentation {
// full int32[0:1]
// count int32[1:4]
// frags array[int32[0:4096], 4]
// }
2017-09-22 09:09:53 +00:00
if (tunfd < 0)
return (uintptr_t)-1;
uint32 length = a0;
2017-09-22 09:09:53 +00:00
char* data = (char*)a1;
debug_dump_data(data, length);
struct vnet_fragmentation* frags = (struct vnet_fragmentation*)a2;
struct iovec vecs[MAX_FRAGS + 1];
uint32 nfrags = 0;
if (!tun_frags_enabled || frags == NULL) {
vecs[nfrags].iov_base = data;
vecs[nfrags].iov_len = length;
nfrags++;
} else {
bool full = true;
uint32 i, count = 0;
NONFAILING(full = frags->full);
NONFAILING(count = frags->count);
if (count > MAX_FRAGS)
count = MAX_FRAGS;
for (i = 0; i < count && length != 0; i++) {
uint32 size = 0;
NONFAILING(size = frags->frags[i]);
if (size > length)
size = length;
vecs[nfrags].iov_base = data;
vecs[nfrags].iov_len = size;
nfrags++;
data += size;
length -= size;
}
if (length != 0 && (full || nfrags == 0)) {
vecs[nfrags].iov_base = data;
vecs[nfrags].iov_len = length;
nfrags++;
}
}
return writev(tunfd, vecs, nfrags);
2017-09-22 09:09:53 +00:00
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_TUN_ENABLE
2017-09-22 09:09:53 +00:00
static void flush_tun()
{
#if SYZ_EXECUTOR
if (!flag_enable_tun)
return;
#endif
2017-09-22 09:09:53 +00:00
char data[SYZ_TUN_MAX_PACKET_SIZE];
while (read_tun(&data[0], sizeof(data)) != -1) {
}
2017-09-22 09:09:53 +00:00
}
#endif
#if SYZ_EXECUTOR || __NR_syz_extract_tcp_res && SYZ_TUN_ENABLE
2017-09-22 09:09:53 +00:00
#ifndef __ANDROID__
// Can't include <linux/ipv6.h>, since it causes
// conflicts due to some structs redefinition.
struct ipv6hdr {
__u8 priority : 4,
version : 4;
__u8 flow_lbl[3];
__be16 payload_len;
__u8 nexthdr;
__u8 hop_limit;
struct in6_addr saddr;
struct in6_addr daddr;
};
#endif
struct tcp_resources {
uint32 seq;
uint32 ack;
2017-09-22 09:09:53 +00:00
};
static long syz_extract_tcp_res(long a0, long a1, long a2)
2017-09-22 09:09:53 +00:00
{
// syz_extract_tcp_res(res ptr[out, tcp_resources], seq_inc int32, ack_inc int32)
if (tunfd < 0)
return (uintptr_t)-1;
char data[SYZ_TUN_MAX_PACKET_SIZE];
int rv = read_tun(&data[0], sizeof(data));
if (rv == -1)
return (uintptr_t)-1;
size_t length = rv;
debug_dump_data(data, length);
struct tcphdr* tcphdr;
if (length < sizeof(struct ethhdr))
return (uintptr_t)-1;
struct ethhdr* ethhdr = (struct ethhdr*)&data[0];
if (ethhdr->h_proto == htons(ETH_P_IP)) {
if (length < sizeof(struct ethhdr) + sizeof(struct iphdr))
return (uintptr_t)-1;
struct iphdr* iphdr = (struct iphdr*)&data[sizeof(struct ethhdr)];
if (iphdr->protocol != IPPROTO_TCP)
return (uintptr_t)-1;
if (length < sizeof(struct ethhdr) + iphdr->ihl * 4 + sizeof(struct tcphdr))
return (uintptr_t)-1;
tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + iphdr->ihl * 4];
} else {
if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
return (uintptr_t)-1;
struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)&data[sizeof(struct ethhdr)];
// TODO: parse and skip extension headers.
if (ipv6hdr->nexthdr != IPPROTO_TCP)
return (uintptr_t)-1;
if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
return (uintptr_t)-1;
tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr)];
}
struct tcp_resources* res = (struct tcp_resources*)a0;
NONFAILING(res->seq = htonl((ntohl(tcphdr->seq) + (uint32)a1)));
NONFAILING(res->ack = htonl((ntohl(tcphdr->ack_seq) + (uint32)a2)));
2017-09-22 09:09:53 +00:00
debug("extracted seq: %08x\n", res->seq);
debug("extracted ack: %08x\n", res->ack);
return 0;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_open_dev
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
static long syz_open_dev(long a0, long a1, long a2)
2017-09-22 09:09:53 +00:00
{
if (a0 == 0xc || a0 == 0xb) {
// syz_open_dev$char(dev const[0xc], major intptr, minor intptr) fd
// syz_open_dev$block(dev const[0xb], major intptr, minor intptr) fd
char buf[128];
sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8)a1, (uint8)a2);
2017-09-22 09:09:53 +00:00
return open(buf, O_RDWR, 0);
} else {
// syz_open_dev(dev strconst, id intptr, flags flags[open_flags]) fd
char buf[1024];
char* hash;
NONFAILING(strncpy(buf, (char*)a0, sizeof(buf) - 1));
2017-09-22 09:09:53 +00:00
buf[sizeof(buf) - 1] = 0;
while ((hash = strchr(buf, '#'))) {
*hash = '0' + (char)(a1 % 10); // 10 devices should be enough for everyone.
a1 /= 10;
}
return open(buf, a2, 0);
}
}
#endif
#if SYZ_EXECUTOR || __NR_syz_open_procfs
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
static long syz_open_procfs(long a0, long a1)
2017-11-27 08:08:59 +00:00
{
// syz_open_procfs(pid pid, file ptr[in, string[procfs_file]]) fd
char buf[128];
memset(buf, 0, sizeof(buf));
if (a0 == 0) {
NONFAILING(snprintf(buf, sizeof(buf), "/proc/self/%s", (char*)a1));
} else if (a0 == -1) {
2017-11-27 08:08:59 +00:00
NONFAILING(snprintf(buf, sizeof(buf), "/proc/thread-self/%s", (char*)a1));
} else {
NONFAILING(snprintf(buf, sizeof(buf), "/proc/self/task/%d/%s", (int)a0, (char*)a1));
}
int fd = open(buf, O_RDWR);
if (fd == -1)
fd = open(buf, O_RDONLY);
return fd;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_open_pts
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
static long syz_open_pts(long a0, long a1)
2017-09-22 09:09:53 +00:00
{
// syz_openpts(fd fd[tty], flags flags[open_flags]) fd[tty]
int ptyno = 0;
if (ioctl(a0, TIOCGPTN, &ptyno))
return -1;
char buf[128];
sprintf(buf, "/dev/pts/%d", ptyno);
return open(buf, a1, 0);
}
#endif
#if SYZ_EXECUTOR || __NR_syz_init_net_socket
#if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID_UNTRUSTED_APP
#include <fcntl.h>
#include <sched.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
const int kInitNetNsFd = 239; // see kMaxFd
// syz_init_net_socket opens a socket in init net namespace.
// Used for families that can only be created in init net namespace.
static long syz_init_net_socket(long domain, long type, long proto)
{
int netns = open("/proc/self/ns/net", O_RDONLY);
if (netns == -1)
return netns;
if (setns(kInitNetNsFd, 0))
return -1;
int sock = syscall(__NR_socket, domain, type, proto);
int err = errno;
if (setns(netns, 0))
fail("setns(netns) failed");
close(netns);
errno = err;
return sock;
}
#else
static long syz_init_net_socket(long domain, long type, long proto)
{
return syscall(__NR_socket, domain, type, proto);
}
#endif
#endif
#if SYZ_EXECUTOR || __NR_syz_genetlink_get_family_id
#include <errno.h>
#include <linux/genetlink.h>
#include <linux/netlink.h>
#include <sys/socket.h>
#include <sys/types.h>
static long syz_genetlink_get_family_id(long name)
{
char buf[512] = {0};
struct nlmsghdr* hdr = (struct nlmsghdr*)buf;
struct genlmsghdr* genlhdr = (struct genlmsghdr*)NLMSG_DATA(hdr);
struct nlattr* attr = (struct nlattr*)(genlhdr + 1);
hdr->nlmsg_len = sizeof(*hdr) + sizeof(*genlhdr) + sizeof(*attr) + GENL_NAMSIZ;
hdr->nlmsg_type = GENL_ID_CTRL;
hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
genlhdr->cmd = CTRL_CMD_GETFAMILY;
attr->nla_type = CTRL_ATTR_FAMILY_NAME;
attr->nla_len = sizeof(*attr) + GENL_NAMSIZ;
NONFAILING(strncpy((char*)(attr + 1), (char*)name, GENL_NAMSIZ));
struct iovec iov = {hdr, hdr->nlmsg_len};
struct sockaddr_nl addr = {0};
addr.nl_family = AF_NETLINK;
debug("syz_genetlink_get_family_id(%s)\n", (char*)(attr + 1));
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
if (fd == -1) {
debug("syz_genetlink_get_family_id: socket failed: %d\n", errno);
return -1;
}
struct msghdr msg = {&addr, sizeof(addr), &iov, 1, NULL, 0, 0};
if (sendmsg(fd, &msg, 0) == -1) {
debug("syz_genetlink_get_family_id: sendmsg failed: %d\n", errno);
close(fd);
return -1;
}
ssize_t n = recv(fd, buf, sizeof(buf), 0);
close(fd);
if (n <= 0) {
debug("syz_genetlink_get_family_id: recv failed: %d\n", errno);
return -1;
}
if (hdr->nlmsg_type != GENL_ID_CTRL) {
debug("syz_genetlink_get_family_id: wrong reply type: %d\n", hdr->nlmsg_type);
return -1;
}
for (; (char*)attr < buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
if (attr->nla_type == CTRL_ATTR_FAMILY_ID)
return *(uint16*)(attr + 1);
}
debug("syz_genetlink_get_family_id: no CTRL_ATTR_FAMILY_ID attr\n");
return -1;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table
#include <errno.h>
#include <fcntl.h>
#include <linux/loop.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
struct fs_image_segment {
void* data;
uintptr_t size;
uintptr_t offset;
};
#define IMAGE_MAX_SEGMENTS 4096
#define IMAGE_MAX_SIZE (129 << 20)
#if GOARCH_386
#define SYZ_memfd_create 356
#elif GOARCH_amd64
#define SYZ_memfd_create 319
#elif GOARCH_arm
#define SYZ_memfd_create 385
#elif GOARCH_arm64
#define SYZ_memfd_create 279
#elif GOARCH_ppc64le
#define SYZ_memfd_create 360
#endif
#endif
#if SYZ_EXECUTOR || __NR_syz_read_part_table
// syz_read_part_table(size intptr, nsegs len[segments], segments ptr[in, array[fs_image_segment]])
static long syz_read_part_table(unsigned long size, unsigned long nsegs, long segments)
{
char loopname[64], linkname[64];
int loopfd, err = 0, res = -1;
unsigned long i, j;
// See the comment in syz_mount_image.
struct fs_image_segment* segs = (struct fs_image_segment*)segments;
if (nsegs > IMAGE_MAX_SEGMENTS)
nsegs = IMAGE_MAX_SEGMENTS;
for (i = 0; i < nsegs; i++) {
if (segs[i].size > IMAGE_MAX_SIZE)
segs[i].size = IMAGE_MAX_SIZE;
segs[i].offset %= IMAGE_MAX_SIZE;
if (segs[i].offset > IMAGE_MAX_SIZE - segs[i].size)
segs[i].offset = IMAGE_MAX_SIZE - segs[i].size;
if (size < segs[i].offset + segs[i].offset)
size = segs[i].offset + segs[i].offset;
}
if (size > IMAGE_MAX_SIZE)
size = IMAGE_MAX_SIZE;
int memfd = syscall(SYZ_memfd_create, "syz_read_part_table", 0);
if (memfd == -1) {
err = errno;
goto error;
}
if (ftruncate(memfd, size)) {
err = errno;
goto error_close_memfd;
}
for (i = 0; i < nsegs; i++) {
if (pwrite(memfd, segs[i].data, segs[i].size, segs[i].offset) < 0) {
debug("syz_read_part_table: pwrite[%u] failed: %d\n", (int)i, errno);
}
}
snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid);
loopfd = open(loopname, O_RDWR);
if (loopfd == -1) {
err = errno;
goto error_close_memfd;
}
if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
if (errno != EBUSY) {
err = errno;
goto error_close_loop;
}
ioctl(loopfd, LOOP_CLR_FD, 0);
usleep(1000);
if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
err = errno;
goto error_close_loop;
}
}
struct loop_info64 info;
if (ioctl(loopfd, LOOP_GET_STATUS64, &info)) {
err = errno;
goto error_clear_loop;
}
#if SYZ_EXECUTOR
cover_reset(0);
#endif
info.lo_flags |= LO_FLAGS_PARTSCAN;
if (ioctl(loopfd, LOOP_SET_STATUS64, &info)) {
err = errno;
goto error_clear_loop;
}
res = 0;
// If we managed to parse some partitions, symlink them into our work dir.
for (i = 1, j = 0; i < 8; i++) {
snprintf(loopname, sizeof(loopname), "/dev/loop%llup%d", procid, (int)i);
struct stat statbuf;
if (stat(loopname, &statbuf) == 0) {
snprintf(linkname, sizeof(linkname), "./file%d", (int)j++);
2018-04-01 16:49:53 +00:00
if (symlink(loopname, linkname)) {
debug("syz_read_part_table: symlink(%s, %s) failed: %d\n", loopname, linkname, errno);
}
}
}
error_clear_loop:
ioctl(loopfd, LOOP_CLR_FD, 0);
error_close_loop:
close(loopfd);
error_close_memfd:
close(memfd);
error:
errno = err;
return res;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_mount_image
#include <string.h>
#include <sys/mount.h>
//syz_mount_image(fs ptr[in, string[disk_filesystems]], dir ptr[in, filename], size intptr, nsegs len[segments], segments ptr[in, array[fs_image_segment]], flags flags[mount_flags], opts ptr[in, fs_options[vfat_options]])
//fs_image_segment {
// data ptr[in, array[int8]]
// size len[data, intptr]
// offset intptr
//}
static long syz_mount_image(long fsarg, long dir, unsigned long size, unsigned long nsegs, long segments, long flags, long optsarg)
{
char loopname[64], fs[32], opts[256];
int loopfd, err = 0, res = -1;
unsigned long i;
// Strictly saying we ought to do a nonfailing copyout of segments into a local var.
// But some filesystems have large number of segments (2000+),
// we can't allocate that much on stack and allocating elsewhere is problematic,
// so we just use the memory allocated by fuzzer.
struct fs_image_segment* segs = (struct fs_image_segment*)segments;
if (nsegs > IMAGE_MAX_SEGMENTS)
nsegs = IMAGE_MAX_SEGMENTS;
for (i = 0; i < nsegs; i++) {
if (segs[i].size > IMAGE_MAX_SIZE)
segs[i].size = IMAGE_MAX_SIZE;
segs[i].offset %= IMAGE_MAX_SIZE;
if (segs[i].offset > IMAGE_MAX_SIZE - segs[i].size)
segs[i].offset = IMAGE_MAX_SIZE - segs[i].size;
if (size < segs[i].offset + segs[i].offset)
size = segs[i].offset + segs[i].offset;
}
if (size > IMAGE_MAX_SIZE)
size = IMAGE_MAX_SIZE;
int memfd = syscall(SYZ_memfd_create, "syz_mount_image", 0);
if (memfd == -1) {
err = errno;
goto error;
}
if (ftruncate(memfd, size)) {
err = errno;
goto error_close_memfd;
}
for (i = 0; i < nsegs; i++) {
if (pwrite(memfd, segs[i].data, segs[i].size, segs[i].offset) < 0) {
debug("syz_mount_image: pwrite[%u] failed: %d\n", (int)i, errno);
}
}
snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid);
loopfd = open(loopname, O_RDWR);
if (loopfd == -1) {
err = errno;
goto error_close_memfd;
}
if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
if (errno != EBUSY) {
err = errno;
goto error_close_loop;
}
ioctl(loopfd, LOOP_CLR_FD, 0);
usleep(1000);
if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
err = errno;
goto error_close_loop;
}
}
mkdir((char*)dir, 0777);
memset(fs, 0, sizeof(fs));
NONFAILING(strncpy(fs, (char*)fsarg, sizeof(fs) - 1));
memset(opts, 0, sizeof(opts));
// Leave some space for the additional options we append below.
NONFAILING(strncpy(opts, (char*)optsarg, sizeof(opts) - 32));
if (strcmp(fs, "iso9660") == 0) {
flags |= MS_RDONLY;
} else if (strncmp(fs, "ext", 3) == 0) {
// For ext2/3/4 we have to have errors=continue because the image
// can contain errors=panic flag and can legally crash kernel.
if (strstr(opts, "errors=panic") || strstr(opts, "errors=remount-ro") == 0)
strcat(opts, ",errors=continue");
} else if (strcmp(fs, "xfs") == 0) {
// For xfs we need nouuid because xfs has a global uuids table
// and if two parallel executors mounts fs with the same uuid, second mount fails.
strcat(opts, ",nouuid");
}
debug("syz_mount_image: size=%llu segs=%llu loop='%s' dir='%s' fs='%s' flags=%llu opts='%s'\n", (uint64)size, (uint64)nsegs, loopname, (char*)dir, fs, (uint64)flags, opts);
#if SYZ_EXECUTOR
cover_reset(0);
#endif
if (mount(loopname, (char*)dir, fs, flags, opts)) {
err = errno;
goto error_clear_loop;
}
res = 0;
error_clear_loop:
ioctl(loopfd, LOOP_CLR_FD, 0);
error_close_loop:
close(loopfd);
error_close_memfd:
close(memfd);
error:
errno = err;
return res;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu
#include <errno.h>
#include <fcntl.h>
#include <linux/kvm.h>
#include <stdarg.h>
#include <stddef.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#if GOARCH_amd64
2017-09-22 09:09:53 +00:00
#include "common_kvm_amd64.h"
#elif GOARCH_arm64
2017-09-22 09:09:53 +00:00
#include "common_kvm_arm64.h"
#else
static long syz_kvm_setup_cpu(long a0, long a1, long a2, long a3, long a4, long a5, long a6, long a7)
2017-09-22 09:09:53 +00:00
{
return 0;
}
#endif
#endif
#if SYZ_EXECUTOR || SYZ_FAULT_INJECTION || SYZ_SANDBOX_NAMESPACE || SYZ_ENABLE_CGROUPS
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
2017-09-22 09:09:53 +00:00
2018-03-22 12:24:02 +00:00
static bool write_file(const char* file, const char* what, ...)
{
char buf[1024];
va_list args;
va_start(args, what);
vsnprintf(buf, sizeof(buf), what, args);
va_end(args);
buf[sizeof(buf) - 1] = 0;
int len = strlen(buf);
int fd = open(file, O_WRONLY | O_CLOEXEC);
if (fd == -1)
return false;
if (write(fd, buf, len) != len) {
int err = errno;
close(fd);
errno = err;
return false;
}
close(fd);
return true;
}
#endif
#if SYZ_EXECUTOR || SYZ_RESET_NET_NAMESPACE
#include <errno.h>
#include <linux/if.h>
#include <linux/net.h>
#include <netinet/in.h>
#include <string.h>
#include <sys/socket.h>
2017-09-22 09:09:53 +00:00
// checkpoint/reset_net_namespace partially resets net namespace to initial state
// after each test. Currently it resets only ipv4 netfilter state.
// Ideally, we just create a new net namespace for each test,
// however it's too slow (1-1.5 seconds per namespace, not parallelizable).
// Linux headers do not compile for C++, so we have to define the structs manualy.
#define XT_TABLE_SIZE 1536
#define XT_MAX_ENTRIES 10
struct xt_counters {
uint64 pcnt, bcnt;
};
struct ipt_getinfo {
char name[32];
unsigned int valid_hooks;
unsigned int hook_entry[5];
unsigned int underflow[5];
unsigned int num_entries;
unsigned int size;
};
struct ipt_get_entries {
char name[32];
unsigned int size;
void* entrytable[XT_TABLE_SIZE / sizeof(void*)];
};
struct ipt_replace {
char name[32];
unsigned int valid_hooks;
unsigned int num_entries;
unsigned int size;
unsigned int hook_entry[5];
unsigned int underflow[5];
unsigned int num_counters;
struct xt_counters* counters;
char entrytable[XT_TABLE_SIZE];
};
struct ipt_table_desc {
const char* name;
struct ipt_getinfo info;
struct ipt_replace replace;
};
static struct ipt_table_desc ipv4_tables[] = {
{.name = "filter"},
{.name = "nat"},
{.name = "mangle"},
{.name = "raw"},
{.name = "security"},
};
static struct ipt_table_desc ipv6_tables[] = {
{.name = "filter"},
{.name = "nat"},
{.name = "mangle"},
{.name = "raw"},
{.name = "security"},
};
#define IPT_BASE_CTL 64
#define IPT_SO_SET_REPLACE (IPT_BASE_CTL)
#define IPT_SO_GET_INFO (IPT_BASE_CTL)
#define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1)
struct arpt_getinfo {
char name[32];
unsigned int valid_hooks;
unsigned int hook_entry[3];
unsigned int underflow[3];
unsigned int num_entries;
unsigned int size;
};
struct arpt_get_entries {
char name[32];
unsigned int size;
void* entrytable[XT_TABLE_SIZE / sizeof(void*)];
};
struct arpt_replace {
char name[32];
unsigned int valid_hooks;
unsigned int num_entries;
unsigned int size;
unsigned int hook_entry[3];
unsigned int underflow[3];
unsigned int num_counters;
struct xt_counters* counters;
char entrytable[XT_TABLE_SIZE];
};
struct arpt_table_desc {
const char* name;
struct arpt_getinfo info;
struct arpt_replace replace;
};
static struct arpt_table_desc arpt_tables[] = {
{.name = "filter"},
};
#define ARPT_BASE_CTL 96
#define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL)
#define ARPT_SO_GET_INFO (ARPT_BASE_CTL)
#define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1)
static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level)
{
struct ipt_get_entries entries;
socklen_t optlen;
int fd, i;
fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("iptable checkpoint %d: socket failed", family);
}
for (i = 0; i < num_tables; i++) {
struct ipt_table_desc* table = &tables[i];
strcpy(table->info.name, table->name);
strcpy(table->replace.name, table->name);
optlen = sizeof(table->info);
if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) {
switch (errno) {
case EPERM:
case ENOENT:
case ENOPROTOOPT:
continue;
}
fail("iptable checkpoint %s/%d: getsockopt(IPT_SO_GET_INFO)", table->name, family);
}
debug("iptable checkpoint %s/%d: checkpoint entries=%d hooks=%x size=%d\n",
table->name, family, table->info.num_entries,
table->info.valid_hooks, table->info.size);
if (table->info.size > sizeof(table->replace.entrytable))
fail("iptable checkpoint %s/%d: table size is too large: %u",
table->name, family, table->info.size);
if (table->info.num_entries > XT_MAX_ENTRIES)
fail("iptable checkpoint %s/%d: too many counters: %u",
table->name, family, table->info.num_entries);
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
fail("iptable checkpoint %s/%d: getsockopt(IPT_SO_GET_ENTRIES)",
table->name, family);
table->replace.valid_hooks = table->info.valid_hooks;
table->replace.num_entries = table->info.num_entries;
table->replace.size = table->info.size;
memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry));
memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow));
memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
}
close(fd);
}
static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level)
{
struct xt_counters counters[XT_MAX_ENTRIES];
struct ipt_get_entries entries;
struct ipt_getinfo info;
socklen_t optlen;
int fd, i;
fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("iptable %d: socket failed", family);
}
for (i = 0; i < num_tables; i++) {
struct ipt_table_desc* table = &tables[i];
if (table->info.valid_hooks == 0)
continue;
memset(&info, 0, sizeof(info));
strcpy(info.name, table->name);
optlen = sizeof(info);
if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen))
fail("iptable %s/%d: getsockopt(IPT_SO_GET_INFO)", table->name, family);
if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
fail("iptable %s/%d: getsockopt(IPT_SO_GET_ENTRIES)", table->name, family);
if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0)
continue;
}
debug("iptable %s/%d: resetting\n", table->name, family);
table->replace.num_counters = info.num_entries;
table->replace.counters = counters;
optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size;
if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen))
fail("iptable %s/%d: setsockopt(IPT_SO_SET_REPLACE)", table->name, family);
}
close(fd);
}
static void checkpoint_arptables(void)
{
struct arpt_get_entries entries;
socklen_t optlen;
unsigned i;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("arptable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
}
for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
struct arpt_table_desc* table = &arpt_tables[i];
strcpy(table->info.name, table->name);
strcpy(table->replace.name, table->name);
optlen = sizeof(table->info);
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) {
switch (errno) {
case EPERM:
case ENOENT:
case ENOPROTOOPT:
continue;
}
fail("arptable checkpoint %s: getsockopt(ARPT_SO_GET_INFO)", table->name);
}
debug("arptable checkpoint %s: entries=%d hooks=%x size=%d\n",
table->name, table->info.num_entries, table->info.valid_hooks, table->info.size);
if (table->info.size > sizeof(table->replace.entrytable))
fail("arptable checkpoint %s: table size is too large: %u",
table->name, table->info.size);
if (table->info.num_entries > XT_MAX_ENTRIES)
fail("arptable checkpoint %s: too many counters: %u",
table->name, table->info.num_entries);
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
fail("arptable checkpoint %s: getsockopt(ARPT_SO_GET_ENTRIES)", table->name);
table->replace.valid_hooks = table->info.valid_hooks;
table->replace.num_entries = table->info.num_entries;
table->replace.size = table->info.size;
memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry));
memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow));
memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
}
close(fd);
}
static void reset_arptables()
{
struct xt_counters counters[XT_MAX_ENTRIES];
struct arpt_get_entries entries;
struct arpt_getinfo info;
socklen_t optlen;
unsigned i;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("arptable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
}
for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
struct arpt_table_desc* table = &arpt_tables[i];
if (table->info.valid_hooks == 0)
continue;
memset(&info, 0, sizeof(info));
strcpy(info.name, table->name);
optlen = sizeof(info);
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen))
fail("arptable %s:getsockopt(ARPT_SO_GET_INFO)", table->name);
if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
fail("arptable %s: getsockopt(ARPT_SO_GET_ENTRIES)", table->name);
if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0)
continue;
debug("arptable %s: data changed\n", table->name);
} else {
debug("arptable %s: header changed\n", table->name);
}
debug("arptable %s: resetting\n", table->name);
table->replace.num_counters = info.num_entries;
table->replace.counters = counters;
optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size;
if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen))
fail("arptable %s: setsockopt(ARPT_SO_SET_REPLACE)", table->name);
}
close(fd);
}
// ebtables.h is broken too:
// ebtables.h: In function ebt_entry_target* ebt_get_target(ebt_entry*):
// ebtables.h:197:19: error: invalid conversion from void* to ebt_entry_target*
#define NF_BR_NUMHOOKS 6
#define EBT_TABLE_MAXNAMELEN 32
#define EBT_CHAIN_MAXNAMELEN 32
#define EBT_BASE_CTL 128
#define EBT_SO_SET_ENTRIES (EBT_BASE_CTL)
#define EBT_SO_GET_INFO (EBT_BASE_CTL)
#define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO + 1)
#define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES + 1)
#define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO + 1)
struct ebt_replace {
char name[EBT_TABLE_MAXNAMELEN];
unsigned int valid_hooks;
unsigned int nentries;
unsigned int entries_size;
struct ebt_entries* hook_entry[NF_BR_NUMHOOKS];
unsigned int num_counters;
struct ebt_counter* counters;
char* entries;
};
struct ebt_entries {
unsigned int distinguisher;
char name[EBT_CHAIN_MAXNAMELEN];
unsigned int counter_offset;
int policy;
unsigned int nentries;
char data[0] __attribute__((aligned(__alignof__(struct ebt_replace))));
};
struct ebt_table_desc {
const char* name;
struct ebt_replace replace;
char entrytable[XT_TABLE_SIZE];
};
static struct ebt_table_desc ebt_tables[] = {
{.name = "filter"},
{.name = "nat"},
{.name = "broute"},
};
static void checkpoint_ebtables(void)
{
socklen_t optlen;
unsigned i;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("ebtable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
}
for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
struct ebt_table_desc* table = &ebt_tables[i];
strcpy(table->replace.name, table->name);
optlen = sizeof(table->replace);
if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) {
switch (errno) {
case EPERM:
case ENOENT:
case ENOPROTOOPT:
continue;
}
fail("ebtable checkpoint %s: getsockopt(EBT_SO_GET_INIT_INFO)", table->name);
}
debug("ebtable checkpoint %s: entries=%d hooks=%x size=%d\n",
table->name, table->replace.nentries, table->replace.valid_hooks,
table->replace.entries_size);
if (table->replace.entries_size > sizeof(table->entrytable))
fail("ebtable checkpoint %s: table size is too large: %u",
table->name, table->replace.entries_size);
table->replace.num_counters = 0;
table->replace.entries = table->entrytable;
optlen = sizeof(table->replace) + table->replace.entries_size;
if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen))
fail("ebtable checkpoint %s: getsockopt(EBT_SO_GET_INIT_ENTRIES)", table->name);
}
close(fd);
}
static void reset_ebtables()
{
struct ebt_replace replace;
char entrytable[XT_TABLE_SIZE];
socklen_t optlen;
unsigned i, j, h;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("ebtable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
}
for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
struct ebt_table_desc* table = &ebt_tables[i];
if (table->replace.valid_hooks == 0)
continue;
memset(&replace, 0, sizeof(replace));
strcpy(replace.name, table->name);
optlen = sizeof(replace);
if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen))
fail("ebtable %s: getsockopt(EBT_SO_GET_INFO)", table->name);
replace.num_counters = 0;
table->replace.entries = 0;
for (h = 0; h < NF_BR_NUMHOOKS; h++)
table->replace.hook_entry[h] = 0;
if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) {
memset(&entrytable, 0, sizeof(entrytable));
replace.entries = entrytable;
optlen = sizeof(replace) + replace.entries_size;
if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen))
fail("ebtable %s: getsockopt(EBT_SO_GET_ENTRIES)", table->name);
if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0)
continue;
}
debug("ebtable %s: resetting\n", table->name);
// Kernel does not seem to return actual entry points (wat?).
for (j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) {
if (table->replace.valid_hooks & (1 << h)) {
table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j;
j++;
}
}
table->replace.entries = table->entrytable;
optlen = sizeof(table->replace) + table->replace.entries_size;
if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen))
fail("ebtable %s: setsockopt(EBT_SO_SET_ENTRIES)", table->name);
}
close(fd);
}
static void checkpoint_net_namespace(void)
{
#if SYZ_EXECUTOR
if (flag_sandbox == sandbox_setuid)
return;
#endif
checkpoint_ebtables();
checkpoint_arptables();
checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP);
checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6);
}
static void reset_net_namespace(void)
{
#if SYZ_EXECUTOR
if (flag_sandbox == sandbox_setuid)
return;
#endif
reset_ebtables();
reset_arptables();
reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP);
reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6);
}
#endif
#if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS
#include <fcntl.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
static void setup_cgroups()
{
if (mkdir("/syzcgroup", 0777)) {
debug("mkdir(/syzcgroup) failed: %d\n", errno);
}
if (mkdir("/syzcgroup/unified", 0777)) {
debug("mkdir(/syzcgroup/unified) failed: %d\n", errno);
}
if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) {
debug("mount(cgroup2) failed: %d\n", errno);
}
if (chmod("/syzcgroup/unified", 0777)) {
debug("chmod(/syzcgroup/unified) failed: %d\n", errno);
}
if (!write_file("/syzcgroup/unified/cgroup.subtree_control", "+cpu +memory +io +pids +rdma")) {
debug("write(cgroup.subtree_control) failed: %d\n", errno);
}
if (mkdir("/syzcgroup/cpu", 0777)) {
debug("mkdir(/syzcgroup/cpu) failed: %d\n", errno);
}
if (mount("none", "/syzcgroup/cpu", "cgroup", 0, "cpuset,cpuacct,perf_event,hugetlb")) {
debug("mount(cgroup cpu) failed: %d\n", errno);
}
if (!write_file("/syzcgroup/cpu/cgroup.clone_children", "1")) {
debug("write(/syzcgroup/cpu/cgroup.clone_children) failed: %d\n", errno);
}
if (chmod("/syzcgroup/cpu", 0777)) {
debug("chmod(/syzcgroup/cpu) failed: %d\n", errno);
}
if (mkdir("/syzcgroup/net", 0777)) {
debug("mkdir(/syzcgroup/net) failed: %d\n", errno);
}
if (mount("none", "/syzcgroup/net", "cgroup", 0, "net_cls,net_prio,devices,freezer")) {
debug("mount(cgroup net) failed: %d\n", errno);
}
if (chmod("/syzcgroup/net", 0777)) {
debug("chmod(/syzcgroup/net) failed: %d\n", errno);
}
}
// TODO(dvyukov): this should be under a separate define for separate minimization,
// but for now we bundle this with cgroups.
static void setup_binfmt_misc()
{
if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0)) {
debug("mount(binfmt_misc) failed: %d\n", errno);
}
if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:")) {
debug("write(/proc/sys/fs/binfmt_misc/register, syz0) failed: %d\n", errno);
}
if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:\x02::./file0:POC")) {
debug("write(/proc/sys/fs/binfmt_misc/register, syz1) failed: %d\n", errno);
}
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID_UNTRUSTED_APP
#include <errno.h>
#include <sys/mount.h>
static void setup_common()
{
if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
debug("mount(fusectl) failed: %d\n", errno);
}
#if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS
setup_cgroups();
setup_binfmt_misc();
#endif
}
#include <sched.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <sys/wait.h>
static void loop();
static void sandbox_common()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
setsid();
#if SYZ_EXECUTOR || __NR_syz_init_net_socket
int netns = open("/proc/self/ns/net", O_RDONLY);
if (netns == -1)
fail("open(/proc/self/ns/net) failed");
if (dup2(netns, kInitNetNsFd) < 0)
fail("dup2(netns, kInitNetNsFd) failed");
close(netns);
#endif
struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = 200 << 20;
setrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = rlim.rlim_max = 32 << 20;
setrlimit(RLIMIT_MEMLOCK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 136 << 20;
setrlimit(RLIMIT_FSIZE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 0;
setrlimit(RLIMIT_CORE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 256; // see kMaxFd
setrlimit(RLIMIT_NOFILE, &rlim);
// CLONE_NEWNS/NEWCGROUP cause EINVAL on some systems,
// so we do them separately of clone in do_sandbox_namespace.
if (unshare(CLONE_NEWNS)) {
debug("unshare(CLONE_NEWNS): %d\n", errno);
}
if (unshare(CLONE_NEWIPC)) {
debug("unshare(CLONE_NEWIPC): %d\n", errno);
}
if (unshare(0x02000000)) {
debug("unshare(CLONE_NEWCGROUP): %d\n", errno);
}
if (unshare(CLONE_NEWUTS)) {
debug("unshare(CLONE_NEWUTS): %d\n", errno);
}
if (unshare(CLONE_SYSVSEM)) {
debug("unshare(CLONE_SYSVSEM): %d\n", errno);
}
}
int wait_for_loop(int pid)
{
if (pid < 0)
fail("sandbox fork failed");
debug("spawned loop pid %d\n", pid);
int status = 0;
while (waitpid(-1, &status, __WALL) != pid) {
}
return WEXITSTATUS(status);
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_NONE
#include <sched.h>
#include <sys/types.h>
static int do_sandbox_none(void)
{
// CLONE_NEWPID takes effect for the first child of the current process,
// so we do it before fork to make the loop "init" process of the namespace.
// We ought to do fail here, but sandbox=none is used in pkg/ipc tests
// and they are usually run under non-root.
// Also since debug is stripped by pkg/csource, we need to do {}
// even though we generally don't do {} around single statements.
if (unshare(CLONE_NEWPID)) {
debug("unshare(CLONE_NEWPID): %d\n", errno);
}
int pid = fork();
if (pid != 0)
return wait_for_loop(pid);
setup_common();
sandbox_common();
if (unshare(CLONE_NEWNET)) {
debug("unshare(CLONE_NEWNET): %d\n", errno);
}
#if SYZ_EXECUTOR || SYZ_TUN_ENABLE
initialize_tun();
#endif
#if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV
initialize_netdevices();
#endif
loop();
doexit(1);
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_SETUID
#include <grp.h>
#include <sched.h>
#include <sys/prctl.h>
#define SYZ_HAVE_SANDBOX_SETUID 1
static int do_sandbox_setuid(void)
{
if (unshare(CLONE_NEWPID)) {
debug("unshare(CLONE_NEWPID): %d\n", errno);
}
int pid = fork();
if (pid != 0)
return wait_for_loop(pid);
setup_common();
sandbox_common();
if (unshare(CLONE_NEWNET)) {
debug("unshare(CLONE_NEWNET): %d\n", errno);
}
#if SYZ_EXECUTOR || SYZ_TUN_ENABLE
initialize_tun();
#endif
#if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV
initialize_netdevices();
#endif
const int nobody = 65534;
if (setgroups(0, NULL))
fail("failed to setgroups");
if (syscall(SYS_setresgid, nobody, nobody, nobody))
fail("failed to setresgid");
if (syscall(SYS_setresuid, nobody, nobody, nobody))
fail("failed to setresuid");
// This is required to open /proc/self/* files.
// Otherwise they are owned by root and we can't open them after setuid.
// See task_dump_owner function in kernel.
prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
loop();
doexit(1);
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE
#include <linux/capability.h>
#include <sched.h>
#include <sys/mman.h>
#include <sys/mount.h>
static int real_uid;
static int real_gid;
__attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20];
static int namespace_sandbox_proc(void* arg)
{
sandbox_common();
// /proc/self/setgroups is not present on some systems, ignore error.
write_file("/proc/self/setgroups", "deny");
if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid))
fail("write of /proc/self/uid_map failed");
if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid))
fail("write of /proc/self/gid_map failed");
// CLONE_NEWNET must always happen before tun setup,
// because we want the tun device in the test namespace.
if (unshare(CLONE_NEWNET))
fail("unshare(CLONE_NEWNET)");
#if SYZ_EXECUTOR || SYZ_TUN_ENABLE
// We setup tun here as it needs to be in the test net namespace,
// which in turn needs to be in the test user namespace.
// However, IFF_NAPI_FRAGS will fail as we are not root already.
2018-12-12 12:14:26 +00:00
// TODO: we should create tun in the init net namespace and use setns
// to move it to the target namespace.
initialize_tun();
#endif
#if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV
initialize_netdevices();
#endif
if (mkdir("./syz-tmp", 0777))
fail("mkdir(syz-tmp) failed");
if (mount("", "./syz-tmp", "tmpfs", 0, NULL))
fail("mount(tmpfs) failed");
if (mkdir("./syz-tmp/newroot", 0777))
fail("mkdir failed");
if (mkdir("./syz-tmp/newroot/dev", 0700))
fail("mkdir failed");
unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL))
fail("mount(dev) failed");
if (mkdir("./syz-tmp/newroot/proc", 0700))
fail("mkdir failed");
if (mount(NULL, "./syz-tmp/newroot/proc", "proc", 0, NULL))
fail("mount(proc) failed");
if (mkdir("./syz-tmp/newroot/selinux", 0700))
fail("mkdir failed");
// selinux mount used to be at /selinux, but then moved to /sys/fs/selinux.
const char* selinux_path = "./syz-tmp/newroot/selinux";
if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) {
if (errno != ENOENT)
fail("mount(/selinux) failed");
if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT)
fail("mount(/sys/fs/selinux) failed");
}
if (mkdir("./syz-tmp/newroot/sys", 0700))
fail("mkdir failed");
if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL))
fail("mount(sysfs) failed");
#if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS
if (mkdir("./syz-tmp/newroot/syzcgroup", 0700))
fail("mkdir failed");
if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700))
fail("mkdir failed");
if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700))
fail("mkdir failed");
if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700))
fail("mkdir failed");
if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL, bind_mount_flags, NULL)) {
debug("mount(cgroup2, MS_BIND) failed: %d\n", errno);
}
if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL, bind_mount_flags, NULL)) {
debug("mount(cgroup/cpu, MS_BIND) failed: %d\n", errno);
}
if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL, bind_mount_flags, NULL)) {
debug("mount(cgroup/net, MS_BIND) failed: %d\n", errno);
}
#endif
if (mkdir("./syz-tmp/pivot", 0777))
fail("mkdir failed");
if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) {
debug("pivot_root failed\n");
if (chdir("./syz-tmp"))
fail("chdir failed");
} else {
debug("pivot_root OK\n");
if (chdir("/"))
fail("chdir failed");
if (umount2("./pivot", MNT_DETACH))
fail("umount failed");
}
if (chroot("./newroot"))
fail("chroot failed");
if (chdir("/"))
fail("chdir failed");
// Drop CAP_SYS_PTRACE so that test processes can't attach to parent processes.
// Previously it lead to hangs because the loop process stopped due to SIGSTOP.
// Note that a process can always ptrace its direct children, which is enough
// for testing purposes.
struct __user_cap_header_struct cap_hdr = {};
struct __user_cap_data_struct cap_data[2] = {};
cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
cap_hdr.pid = getpid();
if (syscall(SYS_capget, &cap_hdr, &cap_data))
fail("capget failed");
cap_data[0].effective &= ~(1 << CAP_SYS_PTRACE);
cap_data[0].permitted &= ~(1 << CAP_SYS_PTRACE);
cap_data[0].inheritable &= ~(1 << CAP_SYS_PTRACE);
if (syscall(SYS_capset, &cap_hdr, &cap_data))
fail("capset failed");
loop();
doexit(1);
}
#define SYZ_HAVE_SANDBOX_NAMESPACE 1
static int do_sandbox_namespace(void)
{
int pid;
setup_common();
real_uid = getuid();
real_gid = getgid();
mprotect(sandbox_stack, 4096, PROT_NONE); // to catch stack underflows
pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64],
CLONE_NEWUSER | CLONE_NEWPID, 0);
return wait_for_loop(pid);
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_ANDROID_UNTRUSTED_APP
#include <fcntl.h> // open(2)
#include <grp.h> // setgroups
#include <sys/xattr.h> // setxattr, getxattr
#define AID_NET_BT_ADMIN 3001
#define AID_NET_BT 3002
#define AID_INET 3003
#define AID_EVERYBODY 9997
#define AID_APP 10000
#define UNTRUSTED_APP_UID AID_APP + 999
#define UNTRUSTED_APP_GID AID_APP + 999
const char* SELINUX_CONTEXT_UNTRUSTED_APP = "u:r:untrusted_app:s0:c512,c768";
const char* SELINUX_LABEL_APP_DATA_FILE = "u:object_r:app_data_file:s0:c512,c768";
const char* SELINUX_CONTEXT_FILE = "/proc/thread-self/attr/current";
const char* SELINUX_XATTR_NAME = "security.selinux";
const gid_t UNTRUSTED_APP_GROUPS[] = {UNTRUSTED_APP_GID, AID_NET_BT_ADMIN, AID_NET_BT, AID_INET, AID_EVERYBODY};
const size_t UNTRUSTED_APP_NUM_GROUPS = sizeof(UNTRUSTED_APP_GROUPS) / sizeof(UNTRUSTED_APP_GROUPS[0]);
// Similar to libselinux getcon(3), but:
// - No library dependency
// - No dynamic memory allocation
// - Uses fail() instead of returning an error code
static void syz_getcon(char* context, size_t context_size)
{
int fd = open(SELINUX_CONTEXT_FILE, O_RDONLY);
if (fd < 0)
fail("getcon: Couldn't open %s", SELINUX_CONTEXT_FILE);
ssize_t nread = read(fd, context, context_size);
close(fd);
if (nread <= 0)
fail("getcon: Failed to read from %s", SELINUX_CONTEXT_FILE);
// The contents of the context file MAY end with a newline
// and MAY not have a null terminator. Handle this here.
if (context[nread - 1] == '\n')
context[nread - 1] = '\0';
}
// Similar to libselinux setcon(3), but:
// - No library dependency
// - No dynamic memory allocation
// - Uses fail() instead of returning an error code
static void syz_setcon(const char* context)
{
char new_context[512];
// Attempt to write the new context
int fd = open(SELINUX_CONTEXT_FILE, O_WRONLY);
if (fd < 0)
fail("setcon: Could not open %s", SELINUX_CONTEXT_FILE);
ssize_t bytes_written = write(fd, context, strlen(context));
// N.B.: We cannot reuse this file descriptor, since the target SELinux context
// may not be able to read from it.
close(fd);
if (bytes_written != (ssize_t)strlen(context))
fail("setcon: Could not write entire context. Wrote %zi, expected %zu", bytes_written, strlen(context));
// Validate the transition by checking the context
syz_getcon(new_context, sizeof(new_context));
if (strcmp(context, new_context) != 0)
fail("setcon: Failed to change to %s, context is %s", context, new_context);
}
// Similar to libselinux getfilecon(3), but:
// - No library dependency
// - No dynamic memory allocation
// - Uses fail() instead of returning an error code
static int syz_getfilecon(const char* path, char* context, size_t context_size)
{
int length = getxattr(path, SELINUX_XATTR_NAME, context, context_size);
if (length == -1)
fail("getfilecon: getxattr failed");
return length;
}
// Similar to libselinux setfilecon(3), but:
// - No library dependency
// - No dynamic memory allocation
// - Uses fail() instead of returning an error code
static void syz_setfilecon(const char* path, const char* context)
{
char new_context[512];
if (setxattr(path, SELINUX_XATTR_NAME, context, strlen(context) + 1, 0) != 0)
fail("setfilecon: setxattr failed");
if (syz_getfilecon(path, new_context, sizeof(new_context)) <= 0)
fail("setfilecon: getfilecon failed");
if (strcmp(context, new_context) != 0)
fail("setfilecon: could not set context to %s, currently %s", context, new_context);
}
#define SYZ_HAVE_SANDBOX_ANDROID_UNTRUSTED_APP 1
static int do_sandbox_android_untrusted_app(void)
{
setup_common();
sandbox_common();
if (chown(".", UNTRUSTED_APP_UID, UNTRUSTED_APP_UID) != 0)
fail("chmod failed");
if (setgroups(UNTRUSTED_APP_NUM_GROUPS, UNTRUSTED_APP_GROUPS) != 0)
fail("setgroups failed");
if (setresgid(UNTRUSTED_APP_GID, UNTRUSTED_APP_GID, UNTRUSTED_APP_GID) != 0)
fail("setresgid failed");
if (setresuid(UNTRUSTED_APP_UID, UNTRUSTED_APP_UID, UNTRUSTED_APP_UID) != 0)
fail("setresuid failed");
syz_setfilecon(".", SELINUX_LABEL_APP_DATA_FILE);
syz_setcon(SELINUX_CONTEXT_UNTRUSTED_APP);
#if SYZ_EXECUTOR || SYZ_TUN_ENABLE
initialize_tun();
#endif
#if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV
initialize_netdevices();
#endif
loop();
doexit(1);
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_USE_TMP_DIR
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#define FS_IOC_SETFLAGS _IOW('f', 2, long)
2017-09-22 09:09:53 +00:00
// One does not simply remove a directory.
// There can be mounts, so we need to try to umount.
// Moreover, a mount can be mounted several times, so we need to try to umount in a loop.
// Moreover, after umount a dir can become non-empty again, so we need another loop.
// Moreover, a mount can be re-mounted as read-only and then we will fail to make a dir empty.
static void remove_dir(const char* dir)
{
DIR* dp;
struct dirent* ep;
int iter = 0;
retry:
while (umount2(dir, MNT_DETACH) == 0) {
debug("umount(%s)\n", dir);
}
2017-09-22 09:09:53 +00:00
dp = opendir(dir);
if (dp == NULL) {
if (errno == EMFILE) {
// This happens when the test process casts prlimit(NOFILE) on us.
// Ideally we somehow prevent test processes from messing with parent processes.
// But full sandboxing is expensive, so let's ignore this error for now.
exitf("opendir(%s) failed due to NOFILE, exiting", dir);
2017-09-22 09:09:53 +00:00
}
exitf("opendir(%s) failed", dir);
}
while ((ep = readdir(dp))) {
if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
continue;
char filename[FILENAME_MAX];
snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
// If it's 9p mount with broken transport, lstat will fail.
// So try to umount first.
while (umount2(filename, MNT_DETACH) == 0) {
debug("umount(%s)\n", filename);
}
2017-09-22 09:09:53 +00:00
struct stat st;
if (lstat(filename, &st))
exitf("lstat(%s) failed", filename);
if (S_ISDIR(st.st_mode)) {
remove_dir(filename);
continue;
}
int i;
for (i = 0;; i++) {
if (unlink(filename) == 0)
break;
if (errno == EPERM) {
// Try to reset FS_XFLAG_IMMUTABLE.
int fd = open(filename, O_RDONLY);
if (fd != -1) {
long flags = 0;
if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
debug("reset FS_XFLAG_IMMUTABLE\n");
close(fd);
continue;
}
}
2017-09-22 09:09:53 +00:00
if (errno == EROFS) {
debug("ignoring EROFS\n");
break;
}
if (errno != EBUSY || i > 100)
exitf("unlink(%s) failed", filename);
debug("umount(%s)\n", filename);
if (umount2(filename, MNT_DETACH))
exitf("umount(%s) failed", filename);
}
}
closedir(dp);
int i;
for (i = 0;; i++) {
if (rmdir(dir) == 0)
break;
if (i < 100) {
if (errno == EPERM) {
// Try to reset FS_XFLAG_IMMUTABLE.
int fd = open(dir, O_RDONLY);
if (fd != -1) {
long flags = 0;
if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
debug("reset FS_XFLAG_IMMUTABLE\n");
close(fd);
continue;
}
}
2017-09-22 09:09:53 +00:00
if (errno == EROFS) {
debug("ignoring EROFS\n");
break;
}
if (errno == EBUSY) {
debug("umount(%s)\n", dir);
if (umount2(dir, MNT_DETACH))
exitf("umount(%s) failed", dir);
continue;
}
if (errno == ENOTEMPTY) {
if (iter < 100) {
iter++;
goto retry;
}
}
}
exitf("rmdir(%s) failed", dir);
}
}
#endif
#if SYZ_EXECUTOR || SYZ_FAULT_INJECTION
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
2017-09-22 09:09:53 +00:00
static int inject_fault(int nth)
{
int fd;
char buf[16];
2017-09-22 09:09:53 +00:00
fd = open("/proc/thread-self/fail-nth", O_RDWR);
// We treat errors here as temporal/non-critical because we see
// occasional ENOENT/EACCES errors returned. It seems that fuzzer
// somehow gets its hands to it.
2017-09-22 09:09:53 +00:00
if (fd == -1)
exitf("failed to open /proc/thread-self/fail-nth");
2017-09-22 09:09:53 +00:00
sprintf(buf, "%d", nth + 1);
if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
exitf("failed to write /proc/thread-self/fail-nth");
2017-09-22 09:09:53 +00:00
return fd;
}
#endif
#if SYZ_EXECUTOR
2017-09-22 09:09:53 +00:00
static int fault_injected(int fail_fd)
{
char buf[16];
int n = read(fail_fd, buf, sizeof(buf) - 1);
if (n <= 0)
exitf("failed to read /proc/thread-self/fail-nth");
2017-09-22 09:09:53 +00:00
int res = n == 2 && buf[0] == '0' && buf[1] == '\n';
buf[0] = '0';
if (write(fail_fd, buf, 1) != 1)
exitf("failed to write /proc/thread-self/fail-nth");
2017-09-22 09:09:53 +00:00
close(fail_fd);
return res;
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
static void kill_and_wait(int pid, int* status)
{
kill(-pid, SIGKILL);
kill(pid, SIGKILL);
int i;
// First, give it up to 100 ms to surrender.
for (i = 0; i < 100; i++) {
if (waitpid(-1, status, WNOHANG | __WALL) == pid)
return;
usleep(1000);
}
// Now, try to abort fuse connections as they cause deadlocks,
// see Documentation/filesystems/fuse.txt for details.
// There is no good way to figure out the right connections
// provided that the process could use unshare(CLONE_NEWNS),
// so we abort all.
debug("kill is not working\n");
DIR* dir = opendir("/sys/fs/fuse/connections");
if (dir) {
for (;;) {
struct dirent* ent = readdir(dir);
if (!ent)
break;
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
continue;
char abort[300];
snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name);
int fd = open(abort, O_WRONLY);
if (fd == -1) {
debug("failed to open %s: %d\n", abort, errno);
continue;
}
debug("aborting fuse conn %s\n", ent->d_name);
if (write(fd, abort, 1) < 0) {
debug("failed to abort: %d\n", errno);
}
close(fd);
}
closedir(dir);
} else {
debug("failed to open /sys/fs/fuse/connections: %d\n", errno);
}
// Now, just wait, no other options.
while (waitpid(-1, status, __WALL) != pid) {
}
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_ENABLE_CGROUPS || SYZ_RESET_NET_NAMESPACE)
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
2017-09-22 09:09:53 +00:00
#define SYZ_HAVE_SETUP_LOOP 1
static void setup_loop()
2017-09-22 09:09:53 +00:00
{
#if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS
int pid = getpid();
2018-03-22 12:24:02 +00:00
char cgroupdir[64];
char file[128];
2018-03-22 12:24:02 +00:00
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
if (mkdir(cgroupdir, 0777)) {
debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
}
// Restrict number of pids per test process to prevent fork bombs.
// We have up to 16 threads + main process + loop.
// 32 pids should be enough for everyone.
snprintf(file, sizeof(file), "%s/pids.max", cgroupdir);
if (!write_file(file, "32")) {
debug("write(%s) failed: %d\n", file, errno);
}
// Restrict memory consumption.
// We have some syscalls that inherently consume lots of memory,
// e.g. mounting some filesystem images requires at least 128MB
// image in memory. We restrict RLIMIT_AS to 200MB. Here we gradually
// increase low/high/max limits to make things more interesting.
// Also this takes into account KASAN quarantine size.
// If the limit is lower than KASAN quarantine size, then it can happen
// so that we kill the process, but all of its memory is in quarantine
// and is still accounted against memcg. As the result memcg won't
// allow to allocate any memory in the parent and in the new test process.
// The current limit of 300MB supports up to 9.6GB RAM (quarantine is 1/32).
snprintf(file, sizeof(file), "%s/memory.low", cgroupdir);
if (!write_file(file, "%d", 298 << 20)) {
debug("write(%s) failed: %d\n", file, errno);
}
snprintf(file, sizeof(file), "%s/memory.high", cgroupdir);
if (!write_file(file, "%d", 299 << 20)) {
debug("write(%s) failed: %d\n", file, errno);
}
snprintf(file, sizeof(file), "%s/memory.max", cgroupdir);
if (!write_file(file, "%d", 300 << 20)) {
debug("write(%s) failed: %d\n", file, errno);
}
// Setup some v1 groups to make things more interesting.
snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
if (!write_file(file, "%d", pid)) {
debug("write(%s) failed: %d\n", file, errno);
}
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
if (mkdir(cgroupdir, 0777)) {
debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
}
snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
if (!write_file(file, "%d", pid)) {
debug("write(%s) failed: %d\n", file, errno);
}
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
if (mkdir(cgroupdir, 0777)) {
debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
}
snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
if (!write_file(file, "%d", pid)) {
debug("write(%s) failed: %d\n", file, errno);
}
2018-03-22 12:24:02 +00:00
#endif
#if SYZ_EXECUTOR || SYZ_RESET_NET_NAMESPACE
checkpoint_net_namespace();
#endif
2017-09-22 09:09:53 +00:00
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_RESET_NET_NAMESPACE || __NR_syz_mount_image || __NR_syz_read_part_table)
#define SYZ_HAVE_RESET_LOOP 1
static void reset_loop()
2017-09-22 09:09:53 +00:00
{
#if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table
char buf[64];
snprintf(buf, sizeof(buf), "/dev/loop%llu", procid);
int loopfd = open(buf, O_RDWR);
if (loopfd != -1) {
ioctl(loopfd, LOOP_CLR_FD, 0);
close(loopfd);
2017-09-22 09:09:53 +00:00
}
#endif
#if SYZ_EXECUTOR || SYZ_RESET_NET_NAMESPACE
reset_net_namespace();
#endif
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT
#include <sys/prctl.h>
#define SYZ_HAVE_SETUP_TEST 1
static void setup_test()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
#if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS
char cgroupdir[64];
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
if (symlink(cgroupdir, "./cgroup")) {
debug("symlink(%s, ./cgroup) failed: %d\n", cgroupdir, errno);
}
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
if (symlink(cgroupdir, "./cgroup.cpu")) {
debug("symlink(%s, ./cgroup.cpu) failed: %d\n", cgroupdir, errno);
}
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
if (symlink(cgroupdir, "./cgroup.net")) {
debug("symlink(%s, ./cgroup.net) failed: %d\n", cgroupdir, errno);
}
// It's the leaf test process we want to be always killed first.
if (!write_file("/proc/self/oom_score_adj", "1000")) {
debug("write(oom_score_adj) failed: %d\n", errno);
}
#endif
#if SYZ_EXECUTOR || SYZ_TUN_ENABLE
// Read all remaining packets from tun to better
// isolate consequently executing programs.
flush_tun();
#endif
}
#define SYZ_HAVE_RESET_TEST 1
static void reset_test()
{
// Keeping a 9p transport pipe open will hang the proccess dead,
// so close all opened file descriptors.
int fd;
for (fd = 3; fd < 30; fd++)
close(fd);
}
#endif