syzkaller/executor/common_linux.h
Dmitry Vyukov fca6a74d0c executor: connect virt_wifi to veth
virt_wifi docs say that the enslaved device won't be usable
on itself. It's probably not a good idea to make lo unusable.
Enslave a dedicated veth instead.
2020-01-03 16:11:49 +01:00

3068 lines
90 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2016 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
// This file is shared between executor and csource package.
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#if SYZ_EXECUTOR
const int kExtraCoverSize = 256 << 10;
struct cover_t;
static void cover_reset(cover_t* cov);
#endif
#if SYZ_EXECUTOR || SYZ_THREADED
#include <linux/futex.h>
#include <pthread.h>
typedef struct {
int state;
} event_t;
static void event_init(event_t* ev)
{
ev->state = 0;
}
static void event_reset(event_t* ev)
{
ev->state = 0;
}
static void event_set(event_t* ev)
{
if (ev->state)
fail("event already set");
__atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
}
static void event_wait(event_t* ev)
{
while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}
static int event_isset(event_t* ev)
{
return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}
static int event_timedwait(event_t* ev, uint64 timeout)
{
uint64 start = current_time_ms();
uint64 now = start;
for (;;) {
uint64 remain = timeout - (now - start);
struct timespec ts;
ts.tv_sec = remain / 1000;
ts.tv_nsec = (remain % 1000) * 1000 * 1000;
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED))
return 1;
now = current_time_ms();
if (now - start > timeout)
return 0;
}
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT || SYZ_NET_INJECTION || SYZ_FAULT || SYZ_SANDBOX_NONE || \
SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID || \
SYZ_FAULT || SYZ_LEAK || SYZ_BINFMT_MISC || (__NR_syz_usb_connect && USB_DEBUG)
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
static bool write_file(const char* file, const char* what, ...)
{
char buf[1024];
va_list args;
va_start(args, what);
vsnprintf(buf, sizeof(buf), what, args);
va_end(args);
buf[sizeof(buf) - 1] = 0;
int len = strlen(buf);
int fd = open(file, O_WRONLY | O_CLOEXEC);
if (fd == -1)
return false;
if (write(fd, buf, len) != len) {
int err = errno;
close(fd);
debug("write(%s) failed: %d\n", file, err);
errno = err;
return false;
}
close(fd);
return true;
}
#endif
#if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI
#include <arpa/inet.h>
#include <net/if.h>
#include <netinet/in.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <linux/if_addr.h>
#include <linux/if_link.h>
#include <linux/in6.h>
#include <linux/neighbour.h>
#include <linux/net.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/veth.h>
struct nlmsg {
char* pos;
int nesting;
struct nlattr* nested[8];
char buf[1024];
};
static struct nlmsg nlmsg;
static void netlink_init(struct nlmsg* nlmsg, int typ, int flags,
const void* data, int size)
{
memset(nlmsg, 0, sizeof(*nlmsg));
struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf;
hdr->nlmsg_type = typ;
hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
memcpy(hdr + 1, data, size);
nlmsg->pos = (char*)(hdr + 1) + NLMSG_ALIGN(size);
}
static void netlink_attr(struct nlmsg* nlmsg, int typ,
const void* data, int size)
{
struct nlattr* attr = (struct nlattr*)nlmsg->pos;
attr->nla_len = sizeof(*attr) + size;
attr->nla_type = typ;
memcpy(attr + 1, data, size);
nlmsg->pos += NLMSG_ALIGN(attr->nla_len);
}
#if SYZ_EXECUTOR || SYZ_NET_DEVICES
static void netlink_nest(struct nlmsg* nlmsg, int typ)
{
struct nlattr* attr = (struct nlattr*)nlmsg->pos;
attr->nla_type = typ;
nlmsg->pos += sizeof(*attr);
nlmsg->nested[nlmsg->nesting++] = attr;
}
static void netlink_done(struct nlmsg* nlmsg)
{
struct nlattr* attr = nlmsg->nested[--nlmsg->nesting];
attr->nla_len = nlmsg->pos - (char*)attr;
}
#endif
static int netlink_send_ext(struct nlmsg* nlmsg, int sock,
uint16 reply_type, int* reply_len)
{
if (nlmsg->pos > nlmsg->buf + sizeof(nlmsg->buf) || nlmsg->nesting)
fail("nlmsg overflow/bad nesting");
struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf;
hdr->nlmsg_len = nlmsg->pos - nlmsg->buf;
struct sockaddr_nl addr;
memset(&addr, 0, sizeof(addr));
addr.nl_family = AF_NETLINK;
unsigned n = sendto(sock, nlmsg->buf, hdr->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr));
if (n != hdr->nlmsg_len)
fail("short netlink write: %d/%d", n, hdr->nlmsg_len);
n = recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0);
if (hdr->nlmsg_type == NLMSG_DONE) {
*reply_len = 0;
return 0;
}
if (n < sizeof(struct nlmsghdr))
fail("short netlink read: %d", n);
if (reply_len && hdr->nlmsg_type == reply_type) {
*reply_len = n;
return 0;
}
if (n < sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))
fail("short netlink read: %d", n);
if (hdr->nlmsg_type != NLMSG_ERROR)
fail("short netlink ack: %d", hdr->nlmsg_type);
return -((struct nlmsgerr*)(hdr + 1))->error;
}
static int netlink_send(struct nlmsg* nlmsg, int sock)
{
return netlink_send_ext(nlmsg, sock, 0, NULL);
}
#if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_DEVLINK_PCI
static int netlink_next_msg(struct nlmsg* nlmsg, unsigned int offset,
unsigned int total_len)
{
struct nlmsghdr* hdr = (struct nlmsghdr*)(nlmsg->buf + offset);
if (offset == total_len || offset + hdr->nlmsg_len > total_len)
return -1;
return hdr->nlmsg_len;
}
#endif
#if SYZ_EXECUTOR || SYZ_NET_DEVICES
static void netlink_add_device_impl(struct nlmsg* nlmsg, const char* type,
const char* name)
{
struct ifinfomsg hdr;
memset(&hdr, 0, sizeof(hdr));
netlink_init(nlmsg, RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr));
if (name)
netlink_attr(nlmsg, IFLA_IFNAME, name, strlen(name));
netlink_nest(nlmsg, IFLA_LINKINFO);
netlink_attr(nlmsg, IFLA_INFO_KIND, type, strlen(type));
}
static void netlink_add_device(struct nlmsg* nlmsg, int sock, const char* type,
const char* name)
{
netlink_add_device_impl(nlmsg, type, name);
netlink_done(nlmsg);
int err = netlink_send(nlmsg, sock);
debug("netlink: adding device %s type %s: %s\n", name, type, strerror(err));
(void)err;
}
static void netlink_add_veth(struct nlmsg* nlmsg, int sock, const char* name,
const char* peer)
{
netlink_add_device_impl(nlmsg, "veth", name);
netlink_nest(nlmsg, IFLA_INFO_DATA);
netlink_nest(nlmsg, VETH_INFO_PEER);
nlmsg->pos += sizeof(struct ifinfomsg);
netlink_attr(nlmsg, IFLA_IFNAME, peer, strlen(peer));
netlink_done(nlmsg);
netlink_done(nlmsg);
netlink_done(nlmsg);
int err = netlink_send(nlmsg, sock);
debug("netlink: adding device %s type veth peer %s: %s\n", name, peer, strerror(err));
(void)err;
}
static void netlink_add_hsr(struct nlmsg* nlmsg, int sock, const char* name,
const char* slave1, const char* slave2)
{
netlink_add_device_impl(nlmsg, "hsr", name);
netlink_nest(nlmsg, IFLA_INFO_DATA);
int ifindex1 = if_nametoindex(slave1);
netlink_attr(nlmsg, IFLA_HSR_SLAVE1, &ifindex1, sizeof(ifindex1));
int ifindex2 = if_nametoindex(slave2);
netlink_attr(nlmsg, IFLA_HSR_SLAVE2, &ifindex2, sizeof(ifindex2));
netlink_done(nlmsg);
netlink_done(nlmsg);
int err = netlink_send(nlmsg, sock);
debug("netlink: adding device %s type hsr slave1 %s slave2 %s: %s\n",
name, slave1, slave2, strerror(err));
(void)err;
}
static void netlink_add_virt_wifi(struct nlmsg* nlmsg, int sock, const char* name, const char* link)
{
netlink_add_device_impl(nlmsg, "virt_wifi", name);
netlink_done(nlmsg);
int ifindex = if_nametoindex(link);
netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
int err = netlink_send(nlmsg, sock);
debug("netlink: adding device %s type virt_wifi link %s: %s\n",
name, link, strerror(err));
(void)err;
}
#endif
#if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI
static void netlink_device_change(struct nlmsg* nlmsg, int sock, const char* name, bool up,
const char* master, const void* mac, int macsize,
const char* new_name)
{
struct ifinfomsg hdr;
memset(&hdr, 0, sizeof(hdr));
if (up)
hdr.ifi_flags = hdr.ifi_change = IFF_UP;
hdr.ifi_index = if_nametoindex(name);
netlink_init(nlmsg, RTM_NEWLINK, 0, &hdr, sizeof(hdr));
if (new_name)
netlink_attr(nlmsg, IFLA_IFNAME, new_name, strlen(new_name));
if (master) {
int ifindex = if_nametoindex(master);
netlink_attr(nlmsg, IFLA_MASTER, &ifindex, sizeof(ifindex));
}
if (macsize)
netlink_attr(nlmsg, IFLA_ADDRESS, mac, macsize);
int err = netlink_send(nlmsg, sock);
debug("netlink: device %s up master %s: %s\n", name, master, strerror(err));
(void)err;
}
#endif
#if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION
static int netlink_add_addr(struct nlmsg* nlmsg, int sock, const char* dev,
const void* addr, int addrsize)
{
struct ifaddrmsg hdr;
memset(&hdr, 0, sizeof(hdr));
hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6;
hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120;
hdr.ifa_scope = RT_SCOPE_UNIVERSE;
hdr.ifa_index = if_nametoindex(dev);
netlink_init(nlmsg, RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, sizeof(hdr));
netlink_attr(nlmsg, IFA_LOCAL, addr, addrsize);
netlink_attr(nlmsg, IFA_ADDRESS, addr, addrsize);
return netlink_send(nlmsg, sock);
}
static void netlink_add_addr4(struct nlmsg* nlmsg, int sock,
const char* dev, const char* addr)
{
struct in_addr in_addr;
inet_pton(AF_INET, addr, &in_addr);
int err = netlink_add_addr(nlmsg, sock, dev, &in_addr, sizeof(in_addr));
debug("netlink: add addr %s dev %s: %s\n", addr, dev, strerror(err));
(void)err;
}
static void netlink_add_addr6(struct nlmsg* nlmsg, int sock,
const char* dev, const char* addr)
{
struct in6_addr in6_addr;
inet_pton(AF_INET6, addr, &in6_addr);
int err = netlink_add_addr(nlmsg, sock, dev, &in6_addr, sizeof(in6_addr));
debug("netlink: add addr %s dev %s: %s\n", addr, dev, strerror(err));
(void)err;
}
#endif
#if SYZ_EXECUTOR || SYZ_NET_INJECTION
static void netlink_add_neigh(struct nlmsg* nlmsg, int sock, const char* name,
const void* addr, int addrsize, const void* mac, int macsize)
{
struct ndmsg hdr;
memset(&hdr, 0, sizeof(hdr));
hdr.ndm_family = addrsize == 4 ? AF_INET : AF_INET6;
hdr.ndm_ifindex = if_nametoindex(name);
hdr.ndm_state = NUD_PERMANENT;
netlink_init(nlmsg, RTM_NEWNEIGH, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr));
netlink_attr(nlmsg, NDA_DST, addr, addrsize);
netlink_attr(nlmsg, NDA_LLADDR, mac, macsize);
int err = netlink_send(nlmsg, sock);
debug("netlink: add neigh %s addr %d lladdr %d: %s\n",
name, addrsize, macsize, strerror(err));
(void)err;
}
#endif
#endif
#if SYZ_EXECUTOR || SYZ_NET_INJECTION
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <stdarg.h>
#include <stdbool.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/ip.h>
#include <linux/tcp.h>
static int tunfd = -1;
static int tun_frags_enabled;
#define TUN_IFACE "syz_tun"
#define LOCAL_MAC 0xaaaaaaaaaaaa
#define REMOTE_MAC 0xaaaaaaaaaabb
#define LOCAL_IPV4 "172.20.20.170"
#define REMOTE_IPV4 "172.20.20.187"
#define LOCAL_IPV6 "fe80::aa"
#define REMOTE_IPV6 "fe80::bb"
#ifndef IFF_NAPI
#define IFF_NAPI 0x0010
#endif
#ifndef IFF_NAPI_FRAGS
#define IFF_NAPI_FRAGS 0x0020
#endif
static void initialize_tun(void)
{
#if SYZ_EXECUTOR
if (!flag_net_injection)
return;
#endif
tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK);
if (tunfd == -1) {
#if SYZ_EXECUTOR
fail("tun: can't open /dev/net/tun");
#else
printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n");
printf("otherwise fuzzing or reproducing might not work as intended\n");
return;
#endif
}
// Remap tun onto higher fd number to hide it from fuzzer and to keep
// fd numbers stable regardless of whether tun is opened or not (also see kMaxFd).
const int kTunFd = 240;
if (dup2(tunfd, kTunFd) < 0)
fail("dup2(tunfd, kTunFd) failed");
close(tunfd);
tunfd = kTunFd;
struct ifreq ifr;
memset(&ifr, 0, sizeof(ifr));
strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ);
ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS;
if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) {
// IFF_NAPI_FRAGS requires root, so try without it.
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0)
fail("tun: ioctl(TUNSETIFF) failed");
}
// If IFF_NAPI_FRAGS is not supported it will be silently dropped,
// so query the effective flags.
if (ioctl(tunfd, TUNGETIFF, (void*)&ifr) < 0)
fail("tun: ioctl(TUNGETIFF) failed");
tun_frags_enabled = (ifr.ifr_flags & IFF_NAPI_FRAGS) != 0;
debug("tun_frags_enabled=%d\n", tun_frags_enabled);
// Disable IPv6 DAD, otherwise the address remains unusable until DAD completes.
// Don't panic because this is an optional config.
char sysctl[64];
sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/accept_dad", TUN_IFACE);
write_file(sysctl, "0");
// Disable IPv6 router solicitation to prevent IPv6 spam.
// Don't panic because this is an optional config.
sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/router_solicitations", TUN_IFACE);
write_file(sysctl, "0");
// There seems to be no way to disable IPv6 MTD to prevent more IPv6 spam.
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock == -1)
fail("socket(AF_NETLINK) failed");
netlink_add_addr4(&nlmsg, sock, TUN_IFACE, LOCAL_IPV4);
netlink_add_addr6(&nlmsg, sock, TUN_IFACE, LOCAL_IPV6);
uint64 macaddr = REMOTE_MAC;
struct in_addr in_addr;
inet_pton(AF_INET, REMOTE_IPV4, &in_addr);
netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in_addr, sizeof(in_addr), &macaddr, ETH_ALEN);
struct in6_addr in6_addr;
inet_pton(AF_INET6, REMOTE_IPV6, &in6_addr);
netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in6_addr, sizeof(in6_addr), &macaddr, ETH_ALEN);
macaddr = LOCAL_MAC;
netlink_device_change(&nlmsg, sock, TUN_IFACE, true, 0, &macaddr, ETH_ALEN, NULL);
close(sock);
}
#endif
#if SYZ_EXECUTOR || __NR_syz_init_net_socket || SYZ_DEVLINK_PCI
const int kInitNetNsFd = 239; // see kMaxFd
#endif
#if SYZ_EXECUTOR || SYZ_DEVLINK_PCI || SYZ_NET_DEVICES
#include <linux/genetlink.h>
#include <stdbool.h>
#define DEVLINK_FAMILY_NAME "devlink"
#define DEVLINK_CMD_PORT_GET 5
#if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
#define DEVLINK_CMD_RELOAD 37
#endif
#define DEVLINK_ATTR_BUS_NAME 1
#define DEVLINK_ATTR_DEV_NAME 2
#define DEVLINK_ATTR_NETDEV_NAME 7
#if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
#define DEVLINK_ATTR_NETNS_FD 138
#endif
static int netlink_devlink_id_get(struct nlmsg* nlmsg, int sock)
{
struct genlmsghdr genlhdr;
struct nlattr* attr;
int err, n;
uint16 id = 0;
memset(&genlhdr, 0, sizeof(genlhdr));
genlhdr.cmd = CTRL_CMD_GETFAMILY;
netlink_init(nlmsg, GENL_ID_CTRL, 0, &genlhdr, sizeof(genlhdr));
netlink_attr(nlmsg, CTRL_ATTR_FAMILY_NAME, DEVLINK_FAMILY_NAME, strlen(DEVLINK_FAMILY_NAME) + 1);
err = netlink_send_ext(nlmsg, sock, GENL_ID_CTRL, &n);
if (err) {
debug("netlink: failed to get devlink family id: %s\n", strerror(err));
return -1;
}
attr = (struct nlattr*)(nlmsg->buf + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr)));
for (; (char*)attr < nlmsg->buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
if (attr->nla_type == CTRL_ATTR_FAMILY_ID) {
id = *(uint16*)(attr + 1);
break;
}
}
if (!id) {
debug("netlink: failed to parse message for devlink family id\n");
return -1;
}
recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); /* recv ack */
return id;
}
#if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
static void netlink_devlink_netns_move(const char* bus_name, const char* dev_name, int netns_fd)
{
struct genlmsghdr genlhdr;
int sock;
int id, err;
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
if (sock == -1)
fail("socket(AF_NETLINK) failed\n");
id = netlink_devlink_id_get(&nlmsg, sock);
if (id == -1)
goto error;
memset(&genlhdr, 0, sizeof(genlhdr));
genlhdr.cmd = DEVLINK_CMD_RELOAD;
netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1);
netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1);
netlink_attr(&nlmsg, DEVLINK_ATTR_NETNS_FD, &netns_fd, sizeof(netns_fd));
err = netlink_send(&nlmsg, sock);
if (err) {
debug("netlink: failed to move devlink instance %s/%s into network namespace: %s\n",
bus_name, dev_name, strerror(err));
}
error:
close(sock);
}
#endif
static struct nlmsg nlmsg2;
static void initialize_devlink_ports(const char* bus_name, const char* dev_name,
const char* netdev_prefix)
{
struct genlmsghdr genlhdr;
int len, total_len, id, err, offset;
uint16 netdev_index;
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
if (sock == -1)
fail("socket(AF_NETLINK) failed\n");
int rtsock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (rtsock == -1)
fail("socket(AF_NETLINK) failed");
id = netlink_devlink_id_get(&nlmsg, sock);
if (id == -1)
goto error;
memset(&genlhdr, 0, sizeof(genlhdr));
genlhdr.cmd = DEVLINK_CMD_PORT_GET;
netlink_init(&nlmsg, id, NLM_F_DUMP, &genlhdr, sizeof(genlhdr));
netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1);
netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1);
err = netlink_send_ext(&nlmsg, sock, id, &total_len);
if (err) {
debug("netlink: failed to get port get reply: %s\n", strerror(err));
goto error;
}
offset = 0;
netdev_index = 0;
while ((len = netlink_next_msg(&nlmsg, offset, total_len)) != -1) {
struct nlattr* attr = (struct nlattr*)(nlmsg.buf + offset + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr)));
for (; (char*)attr < nlmsg.buf + offset + len; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
if (attr->nla_type == DEVLINK_ATTR_NETDEV_NAME) {
char* port_name;
char netdev_name[IFNAMSIZ];
port_name = (char*)(attr + 1);
snprintf(netdev_name, sizeof(netdev_name), "%s%d", netdev_prefix, netdev_index);
netlink_device_change(&nlmsg2, rtsock, port_name, true, 0, 0, 0, netdev_name);
break;
}
}
offset += len;
netdev_index++;
}
error:
close(rtsock);
close(sock);
}
#if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
#include <fcntl.h>
static void initialize_devlink_pci(void)
{
#if SYZ_EXECUTOR
if (!flag_devlink_pci)
return;
#endif
int netns = open("/proc/self/ns/net", O_RDONLY);
if (netns == -1)
fail("open(/proc/self/ns/net) failed");
int ret = setns(kInitNetNsFd, 0);
if (ret == -1)
fail("set_ns(init_netns_fd) failed");
netlink_devlink_netns_move("pci", "0000:00:10.0", netns);
ret = setns(netns, 0);
if (ret == -1)
fail("set_ns(this_netns_fd) failed");
close(netns);
initialize_devlink_ports("pci", "0000:00:10.0", "netpci");
}
#endif
#endif
#if SYZ_EXECUTOR || SYZ_NET_DEVICES
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <stdarg.h>
#include <stdbool.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/uio.h>
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/ip.h>
#include <linux/tcp.h>
// Addresses are chosen to be in the same subnet as tun addresses.
#define DEV_IPV4 "172.20.20.%d"
#define DEV_IPV6 "fe80::%02x"
#define DEV_MAC 0x00aaaaaaaaaa
static void netdevsim_add(unsigned int addr, unsigned int port_count)
{
char buf[16];
sprintf(buf, "%u %u", addr, port_count);
if (write_file("/sys/bus/netdevsim/new_device", buf)) {
snprintf(buf, sizeof(buf), "netdevsim%d", addr);
initialize_devlink_ports("netdevsim", buf, "netdevsim");
}
}
// We test in a separate namespace, which does not have any network devices initially (even lo).
// Create/up as many as we can.
static void initialize_netdevices(void)
{
#if SYZ_EXECUTOR
if (!flag_net_devices)
return;
#endif
// TODO: add the following devices:
// - vlan
// - vxlan
// - macvlan
// - ipvlan
// - macsec
// - ipip
// - lowpan
// - ipoib
// - geneve
// - vrf
// - rmnet
// - openvswitch
// Naive attempts to add devices of these types fail with various errors.
// Also init namespace contains the following devices (which presumably can't be
// created in non-init namespace), can we use them somehow?
// - ifb0/1
// - wpan0/1
// - hwsim0
// - teql0
// - eql
char netdevsim[16];
sprintf(netdevsim, "netdevsim%d", (int)procid);
struct {
const char* type;
const char* dev;
} devtypes[] = {
// Note: ip6erspan device can't be added if ip6gretap exists in the same namespace.
{"ip6gretap", "ip6gretap0"},
{"bridge", "bridge0"},
{"vcan", "vcan0"},
{"bond", "bond0"},
{"team", "team0"},
{"dummy", "dummy0"},
{"nlmon", "nlmon0"},
{"caif", "caif0"},
{"batadv", "batadv0"},
// Note: this adds vxcan0/vxcan1 pair, similar to veth (creating vxcan0 would fail).
{"vxcan", "vxcan1"},
// Note: netdevsim devices can't have the same name even in different namespaces.
{"netdevsim", netdevsim},
// This adds connected veth0 and veth1 devices.
{"veth", 0},
{"xfrm", "xfrm0"},
};
const char* devmasters[] = {"bridge", "bond", "team"};
// If you extend this array, also update netdev_addr_id in vnet.txt.
struct {
const char* name;
int macsize;
bool noipv6;
} devices[] = {
{"lo", ETH_ALEN},
{"sit0", 0},
{"bridge0", ETH_ALEN},
{"vcan0", 0, true},
{"tunl0", 0},
{"gre0", 0},
{"gretap0", ETH_ALEN},
{"ip_vti0", 0},
{"ip6_vti0", 0},
{"ip6tnl0", 0},
{"ip6gre0", 0},
{"ip6gretap0", ETH_ALEN},
{"erspan0", ETH_ALEN},
{"bond0", ETH_ALEN},
{"veth0", ETH_ALEN},
{"veth1", ETH_ALEN},
{"team0", ETH_ALEN},
{"veth0_to_bridge", ETH_ALEN},
{"veth1_to_bridge", ETH_ALEN},
{"veth0_to_bond", ETH_ALEN},
{"veth1_to_bond", ETH_ALEN},
{"veth0_to_team", ETH_ALEN},
{"veth1_to_team", ETH_ALEN},
{"veth0_to_hsr", ETH_ALEN},
{"veth1_to_hsr", ETH_ALEN},
{"hsr0", 0},
{"dummy0", ETH_ALEN},
{"nlmon0", 0},
{"vxcan0", 0, true},
{"vxcan1", 0, true},
{"caif0", ETH_ALEN}, // TODO: up'ing caif fails with ENODEV
{"batadv0", ETH_ALEN},
{netdevsim, ETH_ALEN},
{"xfrm0", ETH_ALEN},
{"veth0_virt_wifi", ETH_ALEN},
{"veth1_virt_wifi", ETH_ALEN},
{"virt_wifi0", ETH_ALEN},
};
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock == -1)
fail("socket(AF_NETLINK) failed");
unsigned i;
for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++)
netlink_add_device(&nlmsg, sock, devtypes[i].type, devtypes[i].dev);
// This creates connected bridge/bond/team_slave devices of type veth,
// and makes them slaves of bridge/bond/team devices, respectively.
// Note: slave devices don't need MAC/IP addresses, only master devices.
// veth0_to_* is not slave devices, which still need ip addresses.
for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) {
char master[32], slave0[32], veth0[32], slave1[32], veth1[32];
sprintf(slave0, "%s_slave_0", devmasters[i]);
sprintf(veth0, "veth0_to_%s", devmasters[i]);
netlink_add_veth(&nlmsg, sock, slave0, veth0);
sprintf(slave1, "%s_slave_1", devmasters[i]);
sprintf(veth1, "veth1_to_%s", devmasters[i]);
netlink_add_veth(&nlmsg, sock, slave1, veth1);
sprintf(master, "%s0", devmasters[i]);
netlink_device_change(&nlmsg, sock, slave0, false, master, 0, 0, NULL);
netlink_device_change(&nlmsg, sock, slave1, false, master, 0, 0, NULL);
}
// bond/team_slave_* will set up automatically when set their master.
// But bridge_slave_* need to set up manually.
netlink_device_change(&nlmsg, sock, "bridge_slave_0", true, 0, 0, 0, NULL);
netlink_device_change(&nlmsg, sock, "bridge_slave_1", true, 0, 0, 0, NULL);
// Setup hsr device (slightly different from what we do for devmasters).
netlink_add_veth(&nlmsg, sock, "hsr_slave_0", "veth0_to_hsr");
netlink_add_veth(&nlmsg, sock, "hsr_slave_1", "veth1_to_hsr");
netlink_add_hsr(&nlmsg, sock, "hsr0", "hsr_slave_0", "hsr_slave_1");
netlink_device_change(&nlmsg, sock, "hsr_slave_0", true, 0, 0, 0, NULL);
netlink_device_change(&nlmsg, sock, "hsr_slave_1", true, 0, 0, 0, NULL);
netlink_add_veth(&nlmsg, sock, "veth0_virt_wifi", "veth1_virt_wifi");
netlink_add_virt_wifi(&nlmsg, sock, "virt_wifi0", "veth1_virt_wifi");
netdevsim_add((int)procid, 4); // Number of port is in sync with value in sys/linux/socket_netlink_generic_devlink.txt
for (i = 0; i < sizeof(devices) / (sizeof(devices[0])); i++) {
// Assign some unique address to devices. Some devices won't up without this.
// Shift addresses by 10 because 0 subnet address can mean special things.
char addr[32];
sprintf(addr, DEV_IPV4, i + 10);
netlink_add_addr4(&nlmsg, sock, devices[i].name, addr);
if (!devices[i].noipv6) {
sprintf(addr, DEV_IPV6, i + 10);
netlink_add_addr6(&nlmsg, sock, devices[i].name, addr);
}
uint64 macaddr = DEV_MAC + ((i + 10ull) << 40);
netlink_device_change(&nlmsg, sock, devices[i].name, true, 0, &macaddr, devices[i].macsize, NULL);
}
close(sock);
}
// Same as initialize_netdevices, but called in init net namespace.
static void initialize_netdevices_init(void)
{
#if SYZ_EXECUTOR
if (!flag_net_devices)
return;
#endif
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock == -1)
fail("socket(AF_NETLINK) failed");
struct {
const char* type;
int macsize;
bool noipv6;
bool noup;
} devtypes[] = {
// NETROM device, see net/netrom/{af_netrom,nr_dev}.c
{"nr", 7, true},
// ROSE device, see net/rose/{af_rose,rose_dev}.c
// We don't up it yet because it crashes kernel right away:
// https://groups.google.com/d/msg/syzkaller/v-4B3zoBC-4/02SCKEzJBwAJ
{"rose", 5, true, true},
};
unsigned i;
for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) {
char dev[32], addr[32];
sprintf(dev, "%s%d", devtypes[i].type, (int)procid);
// Note: syscall descriptions know these addresses.
sprintf(addr, "172.30.%d.%d", i, (int)procid + 1);
netlink_add_addr4(&nlmsg, sock, dev, addr);
if (!devtypes[i].noipv6) {
sprintf(addr, "fe88::%02x:%02x", i, (int)procid + 1);
netlink_add_addr6(&nlmsg, sock, dev, addr);
}
int macsize = devtypes[i].macsize;
uint64 macaddr = 0xbbbbbb + ((unsigned long long)i << (8 * (macsize - 2))) +
(procid << (8 * (macsize - 1)));
netlink_device_change(&nlmsg, sock, dev, !devtypes[i].noup, 0, &macaddr, macsize, NULL);
}
close(sock);
}
#endif
#if SYZ_EXECUTOR || SYZ_NET_INJECTION && (__NR_syz_extract_tcp_res || SYZ_REPEAT)
#include <errno.h>
static int read_tun(char* data, int size)
{
if (tunfd < 0)
return -1;
int rv = read(tunfd, data, size);
if (rv < 0) {
if (errno == EAGAIN)
return -1;
// Tun sometimes returns this, unclear if it's a kernel bug or not.
if (errno == EBADFD)
return -1;
fail("tun: read failed with %d", rv);
}
return rv;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_emit_ethernet && SYZ_NET_INJECTION
#include <stdbool.h>
#include <sys/uio.h>
#define MAX_FRAGS 4
struct vnet_fragmentation {
uint32 full;
uint32 count;
uint32 frags[MAX_FRAGS];
};
static long syz_emit_ethernet(volatile long a0, volatile long a1, volatile long a2)
{
// syz_emit_ethernet(len len[packet], packet ptr[in, eth_packet], frags ptr[in, vnet_fragmentation, opt])
// vnet_fragmentation {
// full int32[0:1]
// count int32[1:4]
// frags array[int32[0:4096], 4]
// }
if (tunfd < 0)
return (uintptr_t)-1;
uint32 length = a0;
char* data = (char*)a1;
debug_dump_data(data, length);
struct vnet_fragmentation* frags = (struct vnet_fragmentation*)a2;
struct iovec vecs[MAX_FRAGS + 1];
uint32 nfrags = 0;
if (!tun_frags_enabled || frags == NULL) {
vecs[nfrags].iov_base = data;
vecs[nfrags].iov_len = length;
nfrags++;
} else {
bool full = true;
uint32 i, count = 0;
NONFAILING(full = frags->full);
NONFAILING(count = frags->count);
if (count > MAX_FRAGS)
count = MAX_FRAGS;
for (i = 0; i < count && length != 0; i++) {
uint32 size = 0;
NONFAILING(size = frags->frags[i]);
if (size > length)
size = length;
vecs[nfrags].iov_base = data;
vecs[nfrags].iov_len = size;
nfrags++;
data += size;
length -= size;
}
if (length != 0 && (full || nfrags == 0)) {
vecs[nfrags].iov_base = data;
vecs[nfrags].iov_len = length;
nfrags++;
}
}
return writev(tunfd, vecs, nfrags);
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_NET_INJECTION
static void flush_tun()
{
#if SYZ_EXECUTOR
if (!flag_net_injection)
return;
#endif
char data[1000];
while (read_tun(&data[0], sizeof(data)) != -1) {
}
}
#endif
#if SYZ_EXECUTOR || __NR_syz_extract_tcp_res && SYZ_NET_INJECTION
#ifndef __ANDROID__
// Can't include <linux/ipv6.h>, since it causes
// conflicts due to some structs redefinition.
struct ipv6hdr {
__u8 priority : 4,
version : 4;
__u8 flow_lbl[3];
__be16 payload_len;
__u8 nexthdr;
__u8 hop_limit;
struct in6_addr saddr;
struct in6_addr daddr;
};
#endif
struct tcp_resources {
uint32 seq;
uint32 ack;
};
static long syz_extract_tcp_res(volatile long a0, volatile long a1, volatile long a2)
{
// syz_extract_tcp_res(res ptr[out, tcp_resources], seq_inc int32, ack_inc int32)
if (tunfd < 0)
return (uintptr_t)-1;
// We just need this to be large enough to hold headers that we parse (ethernet/ip/tcp).
// Rest of the packet (if any) will be silently truncated which is fine.
char data[1000];
int rv = read_tun(&data[0], sizeof(data));
if (rv == -1)
return (uintptr_t)-1;
size_t length = rv;
debug_dump_data(data, length);
struct tcphdr* tcphdr;
if (length < sizeof(struct ethhdr))
return (uintptr_t)-1;
struct ethhdr* ethhdr = (struct ethhdr*)&data[0];
if (ethhdr->h_proto == htons(ETH_P_IP)) {
if (length < sizeof(struct ethhdr) + sizeof(struct iphdr))
return (uintptr_t)-1;
struct iphdr* iphdr = (struct iphdr*)&data[sizeof(struct ethhdr)];
if (iphdr->protocol != IPPROTO_TCP)
return (uintptr_t)-1;
if (length < sizeof(struct ethhdr) + iphdr->ihl * 4 + sizeof(struct tcphdr))
return (uintptr_t)-1;
tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + iphdr->ihl * 4];
} else {
if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
return (uintptr_t)-1;
struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)&data[sizeof(struct ethhdr)];
// TODO: parse and skip extension headers.
if (ipv6hdr->nexthdr != IPPROTO_TCP)
return (uintptr_t)-1;
if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
return (uintptr_t)-1;
tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr)];
}
struct tcp_resources* res = (struct tcp_resources*)a0;
NONFAILING(res->seq = htonl((ntohl(tcphdr->seq) + (uint32)a1)));
NONFAILING(res->ack = htonl((ntohl(tcphdr->ack_seq) + (uint32)a2)));
debug("extracted seq: %08x\n", res->seq);
debug("extracted ack: %08x\n", res->ack);
return 0;
}
#endif
#if SYZ_EXECUTOR || SYZ_CLOSE_FDS || __NR_syz_usb_connect
#define MAX_FDS 30
#endif
#if SYZ_EXECUTOR || __NR_syz_usb_connect
#include <errno.h>
#include <fcntl.h>
#include <linux/usb/ch9.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "common_usb.h"
#endif
#if SYZ_EXECUTOR || __NR_syz_open_dev
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
static long syz_open_dev(volatile long a0, volatile long a1, volatile long a2)
{
if (a0 == 0xc || a0 == 0xb) {
// syz_open_dev$char(dev const[0xc], major intptr, minor intptr) fd
// syz_open_dev$block(dev const[0xb], major intptr, minor intptr) fd
char buf[128];
sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8)a1, (uint8)a2);
return open(buf, O_RDWR, 0);
} else {
// syz_open_dev(dev strconst, id intptr, flags flags[open_flags]) fd
char buf[1024];
char* hash;
NONFAILING(strncpy(buf, (char*)a0, sizeof(buf) - 1));
buf[sizeof(buf) - 1] = 0;
while ((hash = strchr(buf, '#'))) {
*hash = '0' + (char)(a1 % 10); // 10 devices should be enough for everyone.
a1 /= 10;
}
return open(buf, a2, 0);
}
}
#endif
#if SYZ_EXECUTOR || __NR_syz_open_procfs
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
static long syz_open_procfs(volatile long a0, volatile long a1)
{
// syz_open_procfs(pid pid, file ptr[in, string[procfs_file]]) fd
char buf[128];
memset(buf, 0, sizeof(buf));
if (a0 == 0) {
NONFAILING(snprintf(buf, sizeof(buf), "/proc/self/%s", (char*)a1));
} else if (a0 == -1) {
NONFAILING(snprintf(buf, sizeof(buf), "/proc/thread-self/%s", (char*)a1));
} else {
NONFAILING(snprintf(buf, sizeof(buf), "/proc/self/task/%d/%s", (int)a0, (char*)a1));
}
int fd = open(buf, O_RDWR);
if (fd == -1)
fd = open(buf, O_RDONLY);
return fd;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_open_pts
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
static long syz_open_pts(volatile long a0, volatile long a1)
{
// syz_openpts(fd fd[tty], flags flags[open_flags]) fd[tty]
int ptyno = 0;
if (ioctl(a0, TIOCGPTN, &ptyno))
return -1;
char buf[128];
sprintf(buf, "/dev/pts/%d", ptyno);
return open(buf, a1, 0);
}
#endif
#if SYZ_EXECUTOR || __NR_syz_init_net_socket
#if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID
#include <fcntl.h>
#include <sched.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
// syz_init_net_socket opens a socket in init net namespace.
// Used for families that can only be created in init net namespace.
static long syz_init_net_socket(volatile long domain, volatile long type, volatile long proto)
{
int netns = open("/proc/self/ns/net", O_RDONLY);
if (netns == -1)
return netns;
if (setns(kInitNetNsFd, 0))
return -1;
int sock = syscall(__NR_socket, domain, type, proto);
int err = errno;
if (setns(netns, 0))
fail("setns(netns) failed");
close(netns);
errno = err;
return sock;
}
#else
static long syz_init_net_socket(volatile long domain, volatile long type, volatile long proto)
{
return syscall(__NR_socket, domain, type, proto);
}
#endif
#endif
#if SYZ_EXECUTOR || __NR_syz_genetlink_get_family_id
#include <errno.h>
#include <linux/genetlink.h>
#include <linux/netlink.h>
#include <sys/socket.h>
#include <sys/types.h>
static long syz_genetlink_get_family_id(volatile long name)
{
char buf[512] = {0};
struct nlmsghdr* hdr = (struct nlmsghdr*)buf;
struct genlmsghdr* genlhdr = (struct genlmsghdr*)NLMSG_DATA(hdr);
struct nlattr* attr = (struct nlattr*)(genlhdr + 1);
hdr->nlmsg_len = sizeof(*hdr) + sizeof(*genlhdr) + sizeof(*attr) + GENL_NAMSIZ;
hdr->nlmsg_type = GENL_ID_CTRL;
hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
genlhdr->cmd = CTRL_CMD_GETFAMILY;
attr->nla_type = CTRL_ATTR_FAMILY_NAME;
attr->nla_len = sizeof(*attr) + GENL_NAMSIZ;
NONFAILING(strncpy((char*)(attr + 1), (char*)name, GENL_NAMSIZ));
struct iovec iov = {hdr, hdr->nlmsg_len};
struct sockaddr_nl addr = {0};
addr.nl_family = AF_NETLINK;
debug("syz_genetlink_get_family_id(%s)\n", (char*)(attr + 1));
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
if (fd == -1) {
debug("syz_genetlink_get_family_id: socket failed: %d\n", errno);
return -1;
}
struct msghdr msg = {&addr, sizeof(addr), &iov, 1, NULL, 0, 0};
if (sendmsg(fd, &msg, 0) == -1) {
debug("syz_genetlink_get_family_id: sendmsg failed: %d\n", errno);
close(fd);
return -1;
}
ssize_t n = recv(fd, buf, sizeof(buf), 0);
close(fd);
if (n <= 0) {
debug("syz_genetlink_get_family_id: recv failed: %d\n", errno);
return -1;
}
if (hdr->nlmsg_type != GENL_ID_CTRL) {
debug("syz_genetlink_get_family_id: wrong reply type: %d\n", hdr->nlmsg_type);
return -1;
}
for (; (char*)attr < buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
if (attr->nla_type == CTRL_ATTR_FAMILY_ID)
return *(uint16*)(attr + 1);
}
debug("syz_genetlink_get_family_id: no CTRL_ATTR_FAMILY_ID attr\n");
return -1;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table
#include <errno.h>
#include <fcntl.h>
#include <linux/loop.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
struct fs_image_segment {
void* data;
uintptr_t size;
uintptr_t offset;
};
#define IMAGE_MAX_SEGMENTS 4096
#define IMAGE_MAX_SIZE (129 << 20)
#if GOARCH_386
#define sys_memfd_create 356
#elif GOARCH_amd64
#define sys_memfd_create 319
#elif GOARCH_arm
#define sys_memfd_create 385
#elif GOARCH_arm64
#define sys_memfd_create 279
#elif GOARCH_ppc64le
#define sys_memfd_create 360
#elif GOARCH_mips64le
#define sys_memfd_create 314
#endif
static unsigned long fs_image_segment_check(unsigned long size, unsigned long nsegs, long segments)
{
unsigned long i;
// Strictly saying we ought to do a nonfailing copyout of segments into a local var.
// But some filesystems have large number of segments (2000+),
// we can't allocate that much on stack and allocating elsewhere is problematic,
// so we just use the memory allocated by fuzzer.
struct fs_image_segment* segs = (struct fs_image_segment*)segments;
if (nsegs > IMAGE_MAX_SEGMENTS)
nsegs = IMAGE_MAX_SEGMENTS;
for (i = 0; i < nsegs; i++) {
if (segs[i].size > IMAGE_MAX_SIZE)
segs[i].size = IMAGE_MAX_SIZE;
segs[i].offset %= IMAGE_MAX_SIZE;
if (segs[i].offset > IMAGE_MAX_SIZE - segs[i].size)
segs[i].offset = IMAGE_MAX_SIZE - segs[i].size;
if (size < segs[i].offset + segs[i].offset)
size = segs[i].offset + segs[i].offset;
}
if (size > IMAGE_MAX_SIZE)
size = IMAGE_MAX_SIZE;
return size;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_read_part_table
// syz_read_part_table(size intptr, nsegs len[segments], segments ptr[in, array[fs_image_segment]])
static long syz_read_part_table(volatile unsigned long size, volatile unsigned long nsegs, volatile long segments)
{
char loopname[64], linkname[64];
int loopfd, err = 0, res = -1;
unsigned long i, j;
NONFAILING(size = fs_image_segment_check(size, nsegs, segments));
int memfd = syscall(sys_memfd_create, "syz_read_part_table", 0);
if (memfd == -1) {
err = errno;
goto error;
}
if (ftruncate(memfd, size)) {
err = errno;
goto error_close_memfd;
}
for (i = 0; i < nsegs; i++) {
struct fs_image_segment* segs = (struct fs_image_segment*)segments;
int res1 = 0;
NONFAILING(res1 = pwrite(memfd, segs[i].data, segs[i].size, segs[i].offset));
if (res1 < 0) {
debug("syz_read_part_table: pwrite[%u] failed: %d\n", (int)i, errno);
}
}
snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid);
loopfd = open(loopname, O_RDWR);
if (loopfd == -1) {
err = errno;
goto error_close_memfd;
}
if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
if (errno != EBUSY) {
err = errno;
goto error_close_loop;
}
ioctl(loopfd, LOOP_CLR_FD, 0);
usleep(1000);
if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
err = errno;
goto error_close_loop;
}
}
struct loop_info64 info;
if (ioctl(loopfd, LOOP_GET_STATUS64, &info)) {
err = errno;
goto error_clear_loop;
}
#if SYZ_EXECUTOR
cover_reset(0);
#endif
info.lo_flags |= LO_FLAGS_PARTSCAN;
if (ioctl(loopfd, LOOP_SET_STATUS64, &info)) {
err = errno;
goto error_clear_loop;
}
res = 0;
// If we managed to parse some partitions, symlink them into our work dir.
for (i = 1, j = 0; i < 8; i++) {
snprintf(loopname, sizeof(loopname), "/dev/loop%llup%d", procid, (int)i);
struct stat statbuf;
if (stat(loopname, &statbuf) == 0) {
snprintf(linkname, sizeof(linkname), "./file%d", (int)j++);
if (symlink(loopname, linkname)) {
debug("syz_read_part_table: symlink(%s, %s) failed: %d\n", loopname, linkname, errno);
}
}
}
error_clear_loop:
ioctl(loopfd, LOOP_CLR_FD, 0);
error_close_loop:
close(loopfd);
error_close_memfd:
close(memfd);
error:
errno = err;
return res;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_mount_image
#include <string.h>
#include <sys/mount.h>
//syz_mount_image(fs ptr[in, string[disk_filesystems]], dir ptr[in, filename], size intptr, nsegs len[segments], segments ptr[in, array[fs_image_segment]], flags flags[mount_flags], opts ptr[in, fs_options[vfat_options]])
//fs_image_segment {
// data ptr[in, array[int8]]
// size len[data, intptr]
// offset intptr
//}
static long syz_mount_image(volatile long fsarg, volatile long dir, volatile unsigned long size, volatile unsigned long nsegs, volatile long segments, volatile long flags, volatile long optsarg)
{
char loopname[64], fs[32], opts[256];
int loopfd, err = 0, res = -1;
unsigned long i;
NONFAILING(size = fs_image_segment_check(size, nsegs, segments));
int memfd = syscall(sys_memfd_create, "syz_mount_image", 0);
if (memfd == -1) {
err = errno;
goto error;
}
if (ftruncate(memfd, size)) {
err = errno;
goto error_close_memfd;
}
for (i = 0; i < nsegs; i++) {
struct fs_image_segment* segs = (struct fs_image_segment*)segments;
int res1 = 0;
NONFAILING(res1 = pwrite(memfd, segs[i].data, segs[i].size, segs[i].offset));
if (res1 < 0) {
debug("syz_mount_image: pwrite[%u] failed: %d\n", (int)i, errno);
}
}
snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid);
loopfd = open(loopname, O_RDWR);
if (loopfd == -1) {
err = errno;
goto error_close_memfd;
}
if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
if (errno != EBUSY) {
err = errno;
goto error_close_loop;
}
ioctl(loopfd, LOOP_CLR_FD, 0);
usleep(1000);
if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
err = errno;
goto error_close_loop;
}
}
mkdir((char*)dir, 0777);
memset(fs, 0, sizeof(fs));
NONFAILING(strncpy(fs, (char*)fsarg, sizeof(fs) - 1));
memset(opts, 0, sizeof(opts));
// Leave some space for the additional options we append below.
NONFAILING(strncpy(opts, (char*)optsarg, sizeof(opts) - 32));
if (strcmp(fs, "iso9660") == 0) {
flags |= MS_RDONLY;
} else if (strncmp(fs, "ext", 3) == 0) {
// For ext2/3/4 we have to have errors=continue because the image
// can contain errors=panic flag and can legally crash kernel.
if (strstr(opts, "errors=panic") || strstr(opts, "errors=remount-ro") == 0)
strcat(opts, ",errors=continue");
} else if (strcmp(fs, "xfs") == 0) {
// For xfs we need nouuid because xfs has a global uuids table
// and if two parallel executors mounts fs with the same uuid, second mount fails.
strcat(opts, ",nouuid");
}
debug("syz_mount_image: size=%llu segs=%llu loop='%s' dir='%s' fs='%s' flags=%llu opts='%s'\n", (uint64)size, (uint64)nsegs, loopname, (char*)dir, fs, (uint64)flags, opts);
#if SYZ_EXECUTOR
cover_reset(0);
#endif
if (mount(loopname, (char*)dir, fs, flags, opts)) {
err = errno;
goto error_clear_loop;
}
res = 0;
error_clear_loop:
ioctl(loopfd, LOOP_CLR_FD, 0);
error_close_loop:
close(loopfd);
error_close_memfd:
close(memfd);
error:
errno = err;
return res;
}
#endif
#if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu
#include <errno.h>
#include <fcntl.h>
#include <linux/kvm.h>
#include <stdarg.h>
#include <stddef.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#if GOARCH_amd64
#include "common_kvm_amd64.h"
#elif GOARCH_arm64
#include "common_kvm_arm64.h"
#else
static long syz_kvm_setup_cpu(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5, volatile long a6, volatile long a7)
{
return 0;
}
#endif
#endif
#if SYZ_EXECUTOR || SYZ_NET_RESET
#include <errno.h>
#include <net/if.h>
#include <netinet/in.h>
#include <string.h>
#include <sys/socket.h>
#include <linux/net.h>
// checkpoint/reset_net_namespace partially resets net namespace to initial state
// after each test. Currently it resets only ipv4 netfilter state.
// Ideally, we just create a new net namespace for each test,
// however it's too slow (1-1.5 seconds per namespace, not parallelizable).
// Linux headers do not compile for C++, so we have to define the structs manualy.
#define XT_TABLE_SIZE 1536
#define XT_MAX_ENTRIES 10
struct xt_counters {
uint64 pcnt, bcnt;
};
struct ipt_getinfo {
char name[32];
unsigned int valid_hooks;
unsigned int hook_entry[5];
unsigned int underflow[5];
unsigned int num_entries;
unsigned int size;
};
struct ipt_get_entries {
char name[32];
unsigned int size;
void* entrytable[XT_TABLE_SIZE / sizeof(void*)];
};
struct ipt_replace {
char name[32];
unsigned int valid_hooks;
unsigned int num_entries;
unsigned int size;
unsigned int hook_entry[5];
unsigned int underflow[5];
unsigned int num_counters;
struct xt_counters* counters;
char entrytable[XT_TABLE_SIZE];
};
struct ipt_table_desc {
const char* name;
struct ipt_getinfo info;
struct ipt_replace replace;
};
static struct ipt_table_desc ipv4_tables[] = {
{.name = "filter"},
{.name = "nat"},
{.name = "mangle"},
{.name = "raw"},
{.name = "security"},
};
static struct ipt_table_desc ipv6_tables[] = {
{.name = "filter"},
{.name = "nat"},
{.name = "mangle"},
{.name = "raw"},
{.name = "security"},
};
#define IPT_BASE_CTL 64
#define IPT_SO_SET_REPLACE (IPT_BASE_CTL)
#define IPT_SO_GET_INFO (IPT_BASE_CTL)
#define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1)
struct arpt_getinfo {
char name[32];
unsigned int valid_hooks;
unsigned int hook_entry[3];
unsigned int underflow[3];
unsigned int num_entries;
unsigned int size;
};
struct arpt_get_entries {
char name[32];
unsigned int size;
void* entrytable[XT_TABLE_SIZE / sizeof(void*)];
};
struct arpt_replace {
char name[32];
unsigned int valid_hooks;
unsigned int num_entries;
unsigned int size;
unsigned int hook_entry[3];
unsigned int underflow[3];
unsigned int num_counters;
struct xt_counters* counters;
char entrytable[XT_TABLE_SIZE];
};
struct arpt_table_desc {
const char* name;
struct arpt_getinfo info;
struct arpt_replace replace;
};
static struct arpt_table_desc arpt_tables[] = {
{.name = "filter"},
};
#define ARPT_BASE_CTL 96
#define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL)
#define ARPT_SO_GET_INFO (ARPT_BASE_CTL)
#define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1)
static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level)
{
struct ipt_get_entries entries;
socklen_t optlen;
int fd, i;
fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("iptable checkpoint %d: socket failed", family);
}
for (i = 0; i < num_tables; i++) {
struct ipt_table_desc* table = &tables[i];
strcpy(table->info.name, table->name);
strcpy(table->replace.name, table->name);
optlen = sizeof(table->info);
if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) {
switch (errno) {
case EPERM:
case ENOENT:
case ENOPROTOOPT:
continue;
}
fail("iptable checkpoint %s/%d: getsockopt(IPT_SO_GET_INFO)", table->name, family);
}
debug("iptable checkpoint %s/%d: checkpoint entries=%d hooks=%x size=%d\n",
table->name, family, table->info.num_entries,
table->info.valid_hooks, table->info.size);
if (table->info.size > sizeof(table->replace.entrytable))
fail("iptable checkpoint %s/%d: table size is too large: %u",
table->name, family, table->info.size);
if (table->info.num_entries > XT_MAX_ENTRIES)
fail("iptable checkpoint %s/%d: too many counters: %u",
table->name, family, table->info.num_entries);
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
fail("iptable checkpoint %s/%d: getsockopt(IPT_SO_GET_ENTRIES)",
table->name, family);
table->replace.valid_hooks = table->info.valid_hooks;
table->replace.num_entries = table->info.num_entries;
table->replace.size = table->info.size;
memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry));
memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow));
memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
}
close(fd);
}
static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level)
{
struct xt_counters counters[XT_MAX_ENTRIES];
struct ipt_get_entries entries;
struct ipt_getinfo info;
socklen_t optlen;
int fd, i;
fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("iptable %d: socket failed", family);
}
for (i = 0; i < num_tables; i++) {
struct ipt_table_desc* table = &tables[i];
if (table->info.valid_hooks == 0)
continue;
memset(&info, 0, sizeof(info));
strcpy(info.name, table->name);
optlen = sizeof(info);
if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen))
fail("iptable %s/%d: getsockopt(IPT_SO_GET_INFO)", table->name, family);
if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
fail("iptable %s/%d: getsockopt(IPT_SO_GET_ENTRIES)", table->name, family);
if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0)
continue;
}
debug("iptable %s/%d: resetting\n", table->name, family);
table->replace.num_counters = info.num_entries;
table->replace.counters = counters;
optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size;
if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen))
fail("iptable %s/%d: setsockopt(IPT_SO_SET_REPLACE)", table->name, family);
}
close(fd);
}
static void checkpoint_arptables(void)
{
struct arpt_get_entries entries;
socklen_t optlen;
unsigned i;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("arptable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
}
for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
struct arpt_table_desc* table = &arpt_tables[i];
strcpy(table->info.name, table->name);
strcpy(table->replace.name, table->name);
optlen = sizeof(table->info);
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) {
switch (errno) {
case EPERM:
case ENOENT:
case ENOPROTOOPT:
continue;
}
fail("arptable checkpoint %s: getsockopt(ARPT_SO_GET_INFO)", table->name);
}
debug("arptable checkpoint %s: entries=%d hooks=%x size=%d\n",
table->name, table->info.num_entries, table->info.valid_hooks, table->info.size);
if (table->info.size > sizeof(table->replace.entrytable))
fail("arptable checkpoint %s: table size is too large: %u",
table->name, table->info.size);
if (table->info.num_entries > XT_MAX_ENTRIES)
fail("arptable checkpoint %s: too many counters: %u",
table->name, table->info.num_entries);
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
fail("arptable checkpoint %s: getsockopt(ARPT_SO_GET_ENTRIES)", table->name);
table->replace.valid_hooks = table->info.valid_hooks;
table->replace.num_entries = table->info.num_entries;
table->replace.size = table->info.size;
memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry));
memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow));
memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
}
close(fd);
}
static void reset_arptables()
{
struct xt_counters counters[XT_MAX_ENTRIES];
struct arpt_get_entries entries;
struct arpt_getinfo info;
socklen_t optlen;
unsigned i;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("arptable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
}
for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
struct arpt_table_desc* table = &arpt_tables[i];
if (table->info.valid_hooks == 0)
continue;
memset(&info, 0, sizeof(info));
strcpy(info.name, table->name);
optlen = sizeof(info);
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen))
fail("arptable %s:getsockopt(ARPT_SO_GET_INFO)", table->name);
if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
memset(&entries, 0, sizeof(entries));
strcpy(entries.name, table->name);
entries.size = table->info.size;
optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
fail("arptable %s: getsockopt(ARPT_SO_GET_ENTRIES)", table->name);
if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0)
continue;
debug("arptable %s: data changed\n", table->name);
} else {
debug("arptable %s: header changed\n", table->name);
}
debug("arptable %s: resetting\n", table->name);
table->replace.num_counters = info.num_entries;
table->replace.counters = counters;
optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size;
if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen))
fail("arptable %s: setsockopt(ARPT_SO_SET_REPLACE)", table->name);
}
close(fd);
}
// ebtables.h is broken too:
// ebtables.h: In function ebt_entry_target* ebt_get_target(ebt_entry*):
// ebtables.h:197:19: error: invalid conversion from void* to ebt_entry_target*
#define NF_BR_NUMHOOKS 6
#define EBT_TABLE_MAXNAMELEN 32
#define EBT_CHAIN_MAXNAMELEN 32
#define EBT_BASE_CTL 128
#define EBT_SO_SET_ENTRIES (EBT_BASE_CTL)
#define EBT_SO_GET_INFO (EBT_BASE_CTL)
#define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO + 1)
#define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES + 1)
#define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO + 1)
struct ebt_replace {
char name[EBT_TABLE_MAXNAMELEN];
unsigned int valid_hooks;
unsigned int nentries;
unsigned int entries_size;
struct ebt_entries* hook_entry[NF_BR_NUMHOOKS];
unsigned int num_counters;
struct ebt_counter* counters;
char* entries;
};
struct ebt_entries {
unsigned int distinguisher;
char name[EBT_CHAIN_MAXNAMELEN];
unsigned int counter_offset;
int policy;
unsigned int nentries;
char data[0] __attribute__((aligned(__alignof__(struct ebt_replace))));
};
struct ebt_table_desc {
const char* name;
struct ebt_replace replace;
char entrytable[XT_TABLE_SIZE];
};
static struct ebt_table_desc ebt_tables[] = {
{.name = "filter"},
{.name = "nat"},
{.name = "broute"},
};
static void checkpoint_ebtables(void)
{
socklen_t optlen;
unsigned i;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("ebtable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
}
for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
struct ebt_table_desc* table = &ebt_tables[i];
strcpy(table->replace.name, table->name);
optlen = sizeof(table->replace);
if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) {
switch (errno) {
case EPERM:
case ENOENT:
case ENOPROTOOPT:
continue;
}
fail("ebtable checkpoint %s: getsockopt(EBT_SO_GET_INIT_INFO)", table->name);
}
debug("ebtable checkpoint %s: entries=%d hooks=%x size=%d\n",
table->name, table->replace.nentries, table->replace.valid_hooks,
table->replace.entries_size);
if (table->replace.entries_size > sizeof(table->entrytable))
fail("ebtable checkpoint %s: table size is too large: %u",
table->name, table->replace.entries_size);
table->replace.num_counters = 0;
table->replace.entries = table->entrytable;
optlen = sizeof(table->replace) + table->replace.entries_size;
if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen))
fail("ebtable checkpoint %s: getsockopt(EBT_SO_GET_INIT_ENTRIES)", table->name);
}
close(fd);
}
static void reset_ebtables()
{
struct ebt_replace replace;
char entrytable[XT_TABLE_SIZE];
socklen_t optlen;
unsigned i, j, h;
int fd;
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (fd == -1) {
switch (errno) {
case EAFNOSUPPORT:
case ENOPROTOOPT:
return;
}
fail("ebtable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
}
for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
struct ebt_table_desc* table = &ebt_tables[i];
if (table->replace.valid_hooks == 0)
continue;
memset(&replace, 0, sizeof(replace));
strcpy(replace.name, table->name);
optlen = sizeof(replace);
if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen))
fail("ebtable %s: getsockopt(EBT_SO_GET_INFO)", table->name);
replace.num_counters = 0;
table->replace.entries = 0;
for (h = 0; h < NF_BR_NUMHOOKS; h++)
table->replace.hook_entry[h] = 0;
if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) {
memset(&entrytable, 0, sizeof(entrytable));
replace.entries = entrytable;
optlen = sizeof(replace) + replace.entries_size;
if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen))
fail("ebtable %s: getsockopt(EBT_SO_GET_ENTRIES)", table->name);
if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0)
continue;
}
debug("ebtable %s: resetting\n", table->name);
// Kernel does not seem to return actual entry points (wat?).
for (j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) {
if (table->replace.valid_hooks & (1 << h)) {
table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j;
j++;
}
}
table->replace.entries = table->entrytable;
optlen = sizeof(table->replace) + table->replace.entries_size;
if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen))
fail("ebtable %s: setsockopt(EBT_SO_SET_ENTRIES)", table->name);
}
close(fd);
}
static void checkpoint_net_namespace(void)
{
#if SYZ_EXECUTOR
if (!flag_net_reset || flag_sandbox_setuid)
return;
#endif
checkpoint_ebtables();
checkpoint_arptables();
checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP);
checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6);
}
static void reset_net_namespace(void)
{
#if SYZ_EXECUTOR
if (!flag_net_reset || flag_sandbox_setuid)
return;
#endif
reset_ebtables();
reset_arptables();
reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP);
reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6);
}
#endif
#if SYZ_EXECUTOR || (SYZ_CGROUPS && (SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID))
#include <fcntl.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
static void setup_cgroups()
{
#if SYZ_EXECUTOR
if (!flag_cgroups)
return;
#endif
if (mkdir("/syzcgroup", 0777)) {
debug("mkdir(/syzcgroup) failed: %d\n", errno);
}
if (mkdir("/syzcgroup/unified", 0777)) {
debug("mkdir(/syzcgroup/unified) failed: %d\n", errno);
}
if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) {
debug("mount(cgroup2) failed: %d\n", errno);
}
if (chmod("/syzcgroup/unified", 0777)) {
debug("chmod(/syzcgroup/unified) failed: %d\n", errno);
}
write_file("/syzcgroup/unified/cgroup.subtree_control", "+cpu +memory +io +pids +rdma");
if (mkdir("/syzcgroup/cpu", 0777)) {
debug("mkdir(/syzcgroup/cpu) failed: %d\n", errno);
}
if (mount("none", "/syzcgroup/cpu", "cgroup", 0, "cpuset,cpuacct,perf_event,hugetlb")) {
debug("mount(cgroup cpu) failed: %d\n", errno);
}
write_file("/syzcgroup/cpu/cgroup.clone_children", "1");
if (chmod("/syzcgroup/cpu", 0777)) {
debug("chmod(/syzcgroup/cpu) failed: %d\n", errno);
}
if (mkdir("/syzcgroup/net", 0777)) {
debug("mkdir(/syzcgroup/net) failed: %d\n", errno);
}
if (mount("none", "/syzcgroup/net", "cgroup", 0, "net_cls,net_prio,devices,freezer")) {
debug("mount(cgroup net) failed: %d\n", errno);
}
if (chmod("/syzcgroup/net", 0777)) {
debug("chmod(/syzcgroup/net) failed: %d\n", errno);
}
}
#if SYZ_EXECUTOR || SYZ_REPEAT
static void setup_cgroups_loop()
{
#if SYZ_EXECUTOR
if (!flag_cgroups)
return;
#endif
int pid = getpid();
char file[128];
char cgroupdir[64];
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
if (mkdir(cgroupdir, 0777)) {
debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
}
// Restrict number of pids per test process to prevent fork bombs.
// We have up to 16 threads + main process + loop.
// 32 pids should be enough for everyone.
snprintf(file, sizeof(file), "%s/pids.max", cgroupdir);
write_file(file, "32");
// Restrict memory consumption.
// We have some syscalls that inherently consume lots of memory,
// e.g. mounting some filesystem images requires at least 128MB
// image in memory. We restrict RLIMIT_AS to 200MB. Here we gradually
// increase low/high/max limits to make things more interesting.
// Also this takes into account KASAN quarantine size.
// If the limit is lower than KASAN quarantine size, then it can happen
// so that we kill the process, but all of its memory is in quarantine
// and is still accounted against memcg. As the result memcg won't
// allow to allocate any memory in the parent and in the new test process.
// The current limit of 300MB supports up to 9.6GB RAM (quarantine is 1/32).
snprintf(file, sizeof(file), "%s/memory.low", cgroupdir);
write_file(file, "%d", 298 << 20);
snprintf(file, sizeof(file), "%s/memory.high", cgroupdir);
write_file(file, "%d", 299 << 20);
snprintf(file, sizeof(file), "%s/memory.max", cgroupdir);
write_file(file, "%d", 300 << 20);
// Setup some v1 groups to make things more interesting.
snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
write_file(file, "%d", pid);
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
if (mkdir(cgroupdir, 0777)) {
debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
}
snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
write_file(file, "%d", pid);
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
if (mkdir(cgroupdir, 0777)) {
debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
}
snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
write_file(file, "%d", pid);
}
static void setup_cgroups_test()
{
#if SYZ_EXECUTOR
if (!flag_cgroups)
return;
#endif
char cgroupdir[64];
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
if (symlink(cgroupdir, "./cgroup")) {
debug("symlink(%s, ./cgroup) failed: %d\n", cgroupdir, errno);
}
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
if (symlink(cgroupdir, "./cgroup.cpu")) {
debug("symlink(%s, ./cgroup.cpu) failed: %d\n", cgroupdir, errno);
}
snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
if (symlink(cgroupdir, "./cgroup.net")) {
debug("symlink(%s, ./cgroup.net) failed: %d\n", cgroupdir, errno);
}
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE
void initialize_cgroups()
{
#if SYZ_EXECUTOR
if (!flag_cgroups)
return;
#endif
if (mkdir("./syz-tmp/newroot/syzcgroup", 0700))
fail("mkdir failed");
if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700))
fail("mkdir failed");
if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700))
fail("mkdir failed");
if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700))
fail("mkdir failed");
unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL, bind_mount_flags, NULL)) {
debug("mount(cgroup2, MS_BIND) failed: %d\n", errno);
}
if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL, bind_mount_flags, NULL)) {
debug("mount(cgroup/cpu, MS_BIND) failed: %d\n", errno);
}
if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL, bind_mount_flags, NULL)) {
debug("mount(cgroup/net, MS_BIND) failed: %d\n", errno);
}
}
#endif
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID
#include <errno.h>
#include <sys/mount.h>
static void setup_common()
{
if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
debug("mount(fusectl) failed: %d\n", errno);
}
#if SYZ_EXECUTOR || SYZ_CGROUPS
setup_cgroups();
#endif
}
#include <sched.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <sys/wait.h>
static void loop();
static void sandbox_common()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
setsid();
#if SYZ_EXECUTOR || __NR_syz_init_net_socket || SYZ_DEVLINK_PCI
int netns = open("/proc/self/ns/net", O_RDONLY);
if (netns == -1)
fail("open(/proc/self/ns/net) failed");
if (dup2(netns, kInitNetNsFd) < 0)
fail("dup2(netns, kInitNetNsFd) failed");
close(netns);
#endif
struct rlimit rlim;
#if SYZ_EXECUTOR
rlim.rlim_cur = rlim.rlim_max = (200 << 20) +
(kMaxThreads * kCoverSize + kExtraCoverSize) * sizeof(void*);
#else
rlim.rlim_cur = rlim.rlim_max = (200 << 20);
#endif
setrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = rlim.rlim_max = 32 << 20;
setrlimit(RLIMIT_MEMLOCK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 136 << 20;
setrlimit(RLIMIT_FSIZE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 0;
setrlimit(RLIMIT_CORE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 256; // see kMaxFd
setrlimit(RLIMIT_NOFILE, &rlim);
// CLONE_NEWNS/NEWCGROUP cause EINVAL on some systems,
// so we do them separately of clone in do_sandbox_namespace.
if (unshare(CLONE_NEWNS)) {
debug("unshare(CLONE_NEWNS): %d\n", errno);
}
if (unshare(CLONE_NEWIPC)) {
debug("unshare(CLONE_NEWIPC): %d\n", errno);
}
if (unshare(0x02000000)) {
debug("unshare(CLONE_NEWCGROUP): %d\n", errno);
}
if (unshare(CLONE_NEWUTS)) {
debug("unshare(CLONE_NEWUTS): %d\n", errno);
}
if (unshare(CLONE_SYSVSEM)) {
debug("unshare(CLONE_SYSVSEM): %d\n", errno);
}
// These sysctl's restrict ipc resource usage (by default it's possible
// to eat all system memory by creating e.g. lots of large sem sets).
// These sysctl's are per-namespace, so we need to set them inside
// of the test ipc namespace (after CLONE_NEWIPC).
typedef struct {
const char* name;
const char* value;
} sysctl_t;
static const sysctl_t sysctls[] = {
{"/proc/sys/kernel/shmmax", "16777216"},
{"/proc/sys/kernel/shmall", "536870912"},
{"/proc/sys/kernel/shmmni", "1024"},
{"/proc/sys/kernel/msgmax", "8192"},
{"/proc/sys/kernel/msgmni", "1024"},
{"/proc/sys/kernel/msgmnb", "1024"},
{"/proc/sys/kernel/sem", "1024 1048576 500 1024"},
};
unsigned i;
for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++)
write_file(sysctls[i].name, sysctls[i].value);
}
int wait_for_loop(int pid)
{
if (pid < 0)
fail("sandbox fork failed");
debug("spawned loop pid %d\n", pid);
int status = 0;
while (waitpid(-1, &status, __WALL) != pid) {
}
return WEXITSTATUS(status);
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE
#include <linux/capability.h>
static void drop_caps(void)
{
struct __user_cap_header_struct cap_hdr = {};
struct __user_cap_data_struct cap_data[2] = {};
cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
cap_hdr.pid = getpid();
if (syscall(SYS_capget, &cap_hdr, &cap_data))
fail("capget failed");
// Drop CAP_SYS_PTRACE so that test processes can't attach to parent processes.
// Previously it lead to hangs because the loop process stopped due to SIGSTOP.
// Note that a process can always ptrace its direct children, which is enough for testing purposes.
//
// A process with CAP_SYS_NICE can bring kernel down by asking for too high SCHED_DEADLINE priority,
// as the result rcu and other system services that use kernel threads will stop functioning.
// Some parameters for SCHED_DEADLINE should be OK, but we don't have means to enforce
// values of indirect syscall arguments. Peter Zijlstra proposed sysctl_deadline_period_{min,max}
// which could be used to enfore safe limits without droppping CAP_SYS_NICE, but we don't have it yet.
// See the following bug for details:
// https://groups.google.com/forum/#!topic/syzkaller-bugs/G6Wl_PKPIWI
const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE);
cap_data[0].effective &= ~drop;
cap_data[0].permitted &= ~drop;
cap_data[0].inheritable &= ~drop;
if (syscall(SYS_capset, &cap_hdr, &cap_data))
fail("capset failed");
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_NONE
#include <sched.h>
#include <sys/types.h>
static int do_sandbox_none(void)
{
// CLONE_NEWPID takes effect for the first child of the current process,
// so we do it before fork to make the loop "init" process of the namespace.
// We ought to do fail here, but sandbox=none is used in pkg/ipc tests
// and they are usually run under non-root.
// Also since debug is stripped by pkg/csource, we need to do {}
// even though we generally don't do {} around single statements.
if (unshare(CLONE_NEWPID)) {
debug("unshare(CLONE_NEWPID): %d\n", errno);
}
int pid = fork();
if (pid != 0)
return wait_for_loop(pid);
setup_common();
sandbox_common();
drop_caps();
#if SYZ_EXECUTOR || SYZ_NET_DEVICES
initialize_netdevices_init();
#endif
if (unshare(CLONE_NEWNET)) {
debug("unshare(CLONE_NEWNET): %d\n", errno);
}
#if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
initialize_devlink_pci();
#endif
#if SYZ_EXECUTOR || SYZ_NET_INJECTION
initialize_tun();
#endif
#if SYZ_EXECUTOR || SYZ_NET_DEVICES
initialize_netdevices();
#endif
loop();
doexit(1);
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_SETUID
#include <grp.h>
#include <sched.h>
#include <sys/prctl.h>
#define SYZ_HAVE_SANDBOX_SETUID 1
static int do_sandbox_setuid(void)
{
if (unshare(CLONE_NEWPID)) {
debug("unshare(CLONE_NEWPID): %d\n", errno);
}
int pid = fork();
if (pid != 0)
return wait_for_loop(pid);
setup_common();
sandbox_common();
#if SYZ_EXECUTOR || SYZ_NET_DEVICES
initialize_netdevices_init();
#endif
if (unshare(CLONE_NEWNET)) {
debug("unshare(CLONE_NEWNET): %d\n", errno);
}
#if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
initialize_devlink_pci();
#endif
#if SYZ_EXECUTOR || SYZ_NET_INJECTION
initialize_tun();
#endif
#if SYZ_EXECUTOR || SYZ_NET_DEVICES
initialize_netdevices();
#endif
const int nobody = 65534;
if (setgroups(0, NULL))
fail("failed to setgroups");
if (syscall(SYS_setresgid, nobody, nobody, nobody))
fail("failed to setresgid");
if (syscall(SYS_setresuid, nobody, nobody, nobody))
fail("failed to setresuid");
// This is required to open /proc/self/* files.
// Otherwise they are owned by root and we can't open them after setuid.
// See task_dump_owner function in kernel.
prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
loop();
doexit(1);
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE
#include <sched.h>
#include <sys/mman.h>
#include <sys/mount.h>
static int real_uid;
static int real_gid;
__attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20];
static int namespace_sandbox_proc(void* arg)
{
sandbox_common();
// /proc/self/setgroups is not present on some systems, ignore error.
write_file("/proc/self/setgroups", "deny");
if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid))
fail("write of /proc/self/uid_map failed");
if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid))
fail("write of /proc/self/gid_map failed");
#if SYZ_EXECUTOR || SYZ_NET_DEVICES
initialize_netdevices_init();
#endif
// CLONE_NEWNET must always happen before tun setup,
// because we want the tun device in the test namespace.
if (unshare(CLONE_NEWNET))
fail("unshare(CLONE_NEWNET)");
#if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
initialize_devlink_pci();
#endif
#if SYZ_EXECUTOR || SYZ_NET_INJECTION
// We setup tun here as it needs to be in the test net namespace,
// which in turn needs to be in the test user namespace.
// However, IFF_NAPI_FRAGS will fail as we are not root already.
// TODO: we should create tun in the init net namespace and use setns
// to move it to the target namespace.
initialize_tun();
#endif
#if SYZ_EXECUTOR || SYZ_NET_DEVICES
initialize_netdevices();
#endif
if (mkdir("./syz-tmp", 0777))
fail("mkdir(syz-tmp) failed");
if (mount("", "./syz-tmp", "tmpfs", 0, NULL))
fail("mount(tmpfs) failed");
if (mkdir("./syz-tmp/newroot", 0777))
fail("mkdir failed");
if (mkdir("./syz-tmp/newroot/dev", 0700))
fail("mkdir failed");
unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL))
fail("mount(dev) failed");
if (mkdir("./syz-tmp/newroot/proc", 0700))
fail("mkdir failed");
if (mount(NULL, "./syz-tmp/newroot/proc", "proc", 0, NULL))
fail("mount(proc) failed");
if (mkdir("./syz-tmp/newroot/selinux", 0700))
fail("mkdir failed");
// selinux mount used to be at /selinux, but then moved to /sys/fs/selinux.
const char* selinux_path = "./syz-tmp/newroot/selinux";
if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) {
if (errno != ENOENT)
fail("mount(/selinux) failed");
if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT)
fail("mount(/sys/fs/selinux) failed");
}
if (mkdir("./syz-tmp/newroot/sys", 0700))
fail("mkdir failed");
if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL))
fail("mount(sysfs) failed");
#if SYZ_EXECUTOR || SYZ_CGROUPS
initialize_cgroups();
#endif
if (mkdir("./syz-tmp/pivot", 0777))
fail("mkdir failed");
if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) {
debug("pivot_root failed\n");
if (chdir("./syz-tmp"))
fail("chdir failed");
} else {
debug("pivot_root OK\n");
if (chdir("/"))
fail("chdir failed");
if (umount2("./pivot", MNT_DETACH))
fail("umount failed");
}
if (chroot("./newroot"))
fail("chroot failed");
if (chdir("/"))
fail("chdir failed");
drop_caps();
loop();
doexit(1);
}
#define SYZ_HAVE_SANDBOX_NAMESPACE 1
static int do_sandbox_namespace(void)
{
int pid;
setup_common();
real_uid = getuid();
real_gid = getgid();
mprotect(sandbox_stack, 4096, PROT_NONE); // to catch stack underflows
pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64],
CLONE_NEWUSER | CLONE_NEWPID, 0);
return wait_for_loop(pid);
}
#endif
#if SYZ_EXECUTOR || SYZ_SANDBOX_ANDROID
#include <fcntl.h> // open(2)
#include <grp.h> // setgroups
#include <sys/xattr.h> // setxattr, getxattr
#define AID_NET_BT_ADMIN 3001
#define AID_NET_BT 3002
#define AID_INET 3003
#define AID_EVERYBODY 9997
#define AID_APP 10000
#define UNTRUSTED_APP_UID AID_APP + 999
#define UNTRUSTED_APP_GID AID_APP + 999
const char* SELINUX_CONTEXT_UNTRUSTED_APP = "u:r:untrusted_app:s0:c512,c768";
const char* SELINUX_LABEL_APP_DATA_FILE = "u:object_r:app_data_file:s0:c512,c768";
const char* SELINUX_CONTEXT_FILE = "/proc/thread-self/attr/current";
const char* SELINUX_XATTR_NAME = "security.selinux";
const gid_t UNTRUSTED_APP_GROUPS[] = {UNTRUSTED_APP_GID, AID_NET_BT_ADMIN, AID_NET_BT, AID_INET, AID_EVERYBODY};
const size_t UNTRUSTED_APP_NUM_GROUPS = sizeof(UNTRUSTED_APP_GROUPS) / sizeof(UNTRUSTED_APP_GROUPS[0]);
// Similar to libselinux getcon(3), but:
// - No library dependency
// - No dynamic memory allocation
// - Uses fail() instead of returning an error code
static void syz_getcon(char* context, size_t context_size)
{
int fd = open(SELINUX_CONTEXT_FILE, O_RDONLY);
if (fd < 0)
fail("getcon: Couldn't open %s", SELINUX_CONTEXT_FILE);
ssize_t nread = read(fd, context, context_size);
close(fd);
if (nread <= 0)
fail("getcon: Failed to read from %s", SELINUX_CONTEXT_FILE);
// The contents of the context file MAY end with a newline
// and MAY not have a null terminator. Handle this here.
if (context[nread - 1] == '\n')
context[nread - 1] = '\0';
}
// Similar to libselinux setcon(3), but:
// - No library dependency
// - No dynamic memory allocation
// - Uses fail() instead of returning an error code
static void syz_setcon(const char* context)
{
char new_context[512];
// Attempt to write the new context
int fd = open(SELINUX_CONTEXT_FILE, O_WRONLY);
if (fd < 0)
fail("setcon: Could not open %s", SELINUX_CONTEXT_FILE);
ssize_t bytes_written = write(fd, context, strlen(context));
// N.B.: We cannot reuse this file descriptor, since the target SELinux context
// may not be able to read from it.
close(fd);
if (bytes_written != (ssize_t)strlen(context))
fail("setcon: Could not write entire context. Wrote %zi, expected %zu", bytes_written, strlen(context));
// Validate the transition by checking the context
syz_getcon(new_context, sizeof(new_context));
if (strcmp(context, new_context) != 0)
fail("setcon: Failed to change to %s, context is %s", context, new_context);
}
// Similar to libselinux getfilecon(3), but:
// - No library dependency
// - No dynamic memory allocation
// - Uses fail() instead of returning an error code
static int syz_getfilecon(const char* path, char* context, size_t context_size)
{
int length = getxattr(path, SELINUX_XATTR_NAME, context, context_size);
if (length == -1)
fail("getfilecon: getxattr failed");
return length;
}
// Similar to libselinux setfilecon(3), but:
// - No library dependency
// - No dynamic memory allocation
// - Uses fail() instead of returning an error code
static void syz_setfilecon(const char* path, const char* context)
{
char new_context[512];
if (setxattr(path, SELINUX_XATTR_NAME, context, strlen(context) + 1, 0) != 0)
fail("setfilecon: setxattr failed");
if (syz_getfilecon(path, new_context, sizeof(new_context)) <= 0)
fail("setfilecon: getfilecon failed");
if (strcmp(context, new_context) != 0)
fail("setfilecon: could not set context to %s, currently %s", context, new_context);
}
#define SYZ_HAVE_SANDBOX_ANDROID 1
static int do_sandbox_android(void)
{
setup_common();
sandbox_common();
if (chown(".", UNTRUSTED_APP_UID, UNTRUSTED_APP_UID) != 0)
fail("chmod failed");
if (setgroups(UNTRUSTED_APP_NUM_GROUPS, UNTRUSTED_APP_GROUPS) != 0)
fail("setgroups failed");
if (setresgid(UNTRUSTED_APP_GID, UNTRUSTED_APP_GID, UNTRUSTED_APP_GID) != 0)
fail("setresgid failed");
if (setresuid(UNTRUSTED_APP_UID, UNTRUSTED_APP_UID, UNTRUSTED_APP_UID) != 0)
fail("setresuid failed");
syz_setfilecon(".", SELINUX_LABEL_APP_DATA_FILE);
syz_setcon(SELINUX_CONTEXT_UNTRUSTED_APP);
#if SYZ_EXECUTOR || SYZ_NET_INJECTION
initialize_tun();
#endif
#if SYZ_EXECUTOR || SYZ_NET_DEVICES
// Note: sandbox_android does not unshare net namespace.
initialize_netdevices_init();
initialize_netdevices();
#endif
loop();
doexit(1);
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_USE_TMP_DIR
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#define FS_IOC_SETFLAGS _IOW('f', 2, long)
// One does not simply remove a directory.
// There can be mounts, so we need to try to umount.
// Moreover, a mount can be mounted several times, so we need to try to umount in a loop.
// Moreover, after umount a dir can become non-empty again, so we need another loop.
// Moreover, a mount can be re-mounted as read-only and then we will fail to make a dir empty.
static void remove_dir(const char* dir)
{
DIR* dp;
struct dirent* ep;
int iter = 0;
retry:
while (umount2(dir, MNT_DETACH) == 0) {
debug("umount(%s)\n", dir);
}
dp = opendir(dir);
if (dp == NULL) {
if (errno == EMFILE) {
// This happens when the test process casts prlimit(NOFILE) on us.
// Ideally we somehow prevent test processes from messing with parent processes.
// But full sandboxing is expensive, so let's ignore this error for now.
exitf("opendir(%s) failed due to NOFILE, exiting", dir);
}
exitf("opendir(%s) failed", dir);
}
while ((ep = readdir(dp))) {
if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
continue;
char filename[FILENAME_MAX];
snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
// If it's 9p mount with broken transport, lstat will fail.
// So try to umount first.
while (umount2(filename, MNT_DETACH) == 0) {
debug("umount(%s)\n", filename);
}
struct stat st;
if (lstat(filename, &st))
exitf("lstat(%s) failed", filename);
if (S_ISDIR(st.st_mode)) {
remove_dir(filename);
continue;
}
int i;
for (i = 0;; i++) {
if (unlink(filename) == 0)
break;
if (errno == EPERM) {
// Try to reset FS_XFLAG_IMMUTABLE.
int fd = open(filename, O_RDONLY);
if (fd != -1) {
long flags = 0;
if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
debug("reset FS_XFLAG_IMMUTABLE\n");
}
close(fd);
continue;
}
}
if (errno == EROFS) {
debug("ignoring EROFS\n");
break;
}
if (errno != EBUSY || i > 100)
exitf("unlink(%s) failed", filename);
debug("umount(%s)\n", filename);
if (umount2(filename, MNT_DETACH))
exitf("umount(%s) failed", filename);
}
}
closedir(dp);
int i;
for (i = 0;; i++) {
if (rmdir(dir) == 0)
break;
if (i < 100) {
if (errno == EPERM) {
// Try to reset FS_XFLAG_IMMUTABLE.
int fd = open(dir, O_RDONLY);
if (fd != -1) {
long flags = 0;
if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
debug("reset FS_XFLAG_IMMUTABLE\n");
}
close(fd);
continue;
}
}
if (errno == EROFS) {
debug("ignoring EROFS\n");
break;
}
if (errno == EBUSY) {
debug("umount(%s)\n", dir);
if (umount2(dir, MNT_DETACH))
exitf("umount(%s) failed", dir);
continue;
}
if (errno == ENOTEMPTY) {
if (iter < 100) {
iter++;
goto retry;
}
}
}
exitf("rmdir(%s) failed", dir);
}
}
#endif
#if SYZ_EXECUTOR || SYZ_FAULT
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
static int inject_fault(int nth)
{
int fd;
fd = open("/proc/thread-self/fail-nth", O_RDWR);
// We treat errors here as temporal/non-critical because we see
// occasional ENOENT/EACCES errors returned. It seems that fuzzer
// somehow gets its hands to it.
if (fd == -1)
exitf("failed to open /proc/thread-self/fail-nth");
char buf[16];
sprintf(buf, "%d", nth + 1);
if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
exitf("failed to write /proc/thread-self/fail-nth");
return fd;
}
#endif
#if SYZ_EXECUTOR
static int fault_injected(int fail_fd)
{
char buf[16];
int n = read(fail_fd, buf, sizeof(buf) - 1);
if (n <= 0)
exitf("failed to read /proc/thread-self/fail-nth");
int res = n == 2 && buf[0] == '0' && buf[1] == '\n';
buf[0] = '0';
if (write(fail_fd, buf, 1) != 1)
exitf("failed to write /proc/thread-self/fail-nth");
close(fail_fd);
return res;
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
static void kill_and_wait(int pid, int* status)
{
kill(-pid, SIGKILL);
kill(pid, SIGKILL);
int i;
// First, give it up to 100 ms to surrender.
for (i = 0; i < 100; i++) {
if (waitpid(-1, status, WNOHANG | __WALL) == pid)
return;
usleep(1000);
}
// Now, try to abort fuse connections as they cause deadlocks,
// see Documentation/filesystems/fuse.txt for details.
// There is no good way to figure out the right connections
// provided that the process could use unshare(CLONE_NEWNS),
// so we abort all.
debug("kill is not working\n");
DIR* dir = opendir("/sys/fs/fuse/connections");
if (dir) {
for (;;) {
struct dirent* ent = readdir(dir);
if (!ent)
break;
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
continue;
char abort[300];
snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name);
int fd = open(abort, O_WRONLY);
if (fd == -1) {
debug("failed to open %s: %d\n", abort, errno);
continue;
}
debug("aborting fuse conn %s\n", ent->d_name);
if (write(fd, abort, 1) < 0) {
debug("failed to abort: %d\n", errno);
}
close(fd);
}
closedir(dir);
} else {
debug("failed to open /sys/fs/fuse/connections: %d\n", errno);
}
// Now, just wait, no other options.
while (waitpid(-1, status, __WALL) != pid) {
}
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_CGROUPS || SYZ_NET_RESET)
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#define SYZ_HAVE_SETUP_LOOP 1
static void setup_loop()
{
#if SYZ_EXECUTOR || SYZ_CGROUPS
setup_cgroups_loop();
#endif
#if SYZ_EXECUTOR || SYZ_NET_RESET
checkpoint_net_namespace();
#endif
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_NET_RESET || __NR_syz_mount_image || __NR_syz_read_part_table)
#define SYZ_HAVE_RESET_LOOP 1
static void reset_loop()
{
#if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table
char buf[64];
snprintf(buf, sizeof(buf), "/dev/loop%llu", procid);
int loopfd = open(buf, O_RDWR);
if (loopfd != -1) {
ioctl(loopfd, LOOP_CLR_FD, 0);
close(loopfd);
}
#endif
#if SYZ_EXECUTOR || SYZ_NET_RESET
reset_net_namespace();
#endif
}
#endif
#if SYZ_EXECUTOR || SYZ_REPEAT
#include <sys/prctl.h>
#define SYZ_HAVE_SETUP_TEST 1
static void setup_test()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
#if SYZ_EXECUTOR || SYZ_CGROUPS
setup_cgroups_test();
#endif
// It's the leaf test process we want to be always killed first.
write_file("/proc/self/oom_score_adj", "1000");
#if SYZ_EXECUTOR || SYZ_NET_INJECTION
// Read all remaining packets from tun to better
// isolate consequently executing programs.
flush_tun();
#endif
}
#endif
#if SYZ_EXECUTOR || SYZ_CLOSE_FDS
#define SYZ_HAVE_CLOSE_FDS 1
static void close_fds()
{
#if SYZ_EXECUTOR
if (!flag_close_fds)
return;
#endif
// Keeping a 9p transport pipe open will hang the proccess dead,
// so close all opened file descriptors.
// Also close all USB emulation descriptors to trigger exit from USB
// event loop to collect coverage.
int fd;
for (fd = 3; fd < MAX_FDS; fd++)
close(fd);
}
#endif
#if SYZ_EXECUTOR || SYZ_FAULT
#include <errno.h>
static void setup_fault()
{
static struct {
const char* file;
const char* val;
bool fatal;
} files[] = {
{"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true},
// These are enabled by separate configs (e.g. CONFIG_FAIL_FUTEX)
// and we did not check all of them in host.checkFaultInjection, so we ignore errors.
{"/sys/kernel/debug/fail_futex/ignore-private", "N", false},
{"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false},
{"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false},
{"/sys/kernel/debug/fail_page_alloc/min-order", "0", false},
};
unsigned i;
for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) {
if (!write_file(files[i].file, files[i].val)) {
debug("failed to write %s: %d\n", files[i].file, errno);
if (files[i].fatal)
fail("failed to write %s", files[i].file);
}
}
}
#endif
#if SYZ_EXECUTOR || SYZ_LEAK
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#define KMEMLEAK_FILE "/sys/kernel/debug/kmemleak"
static void setup_leak()
{
// Flush boot leaks.
if (!write_file(KMEMLEAK_FILE, "scan"))
fail("failed to write %s", KMEMLEAK_FILE);
sleep(5); // account for MSECS_MIN_AGE
if (!write_file(KMEMLEAK_FILE, "scan"))
fail("failed to write %s", KMEMLEAK_FILE);
if (!write_file(KMEMLEAK_FILE, "clear"))
fail("failed to write %s", KMEMLEAK_FILE);
}
#define SYZ_HAVE_LEAK_CHECK 1
#if SYZ_EXECUTOR
static void check_leaks(char** frames, int nframes)
#else
static void check_leaks(void)
#endif
{
int fd = open(KMEMLEAK_FILE, O_RDWR);
if (fd == -1)
fail("failed to open(\"%s\")", KMEMLEAK_FILE);
// KMEMLEAK has false positives. To mitigate most of them, it checksums
// potentially leaked objects, and reports them only on the next scan
// iff the checksum does not change. Because of that we do the following
// intricate dance:
// Scan, sleep, scan again. At this point we can get some leaks.
// If there are leaks, we sleep and scan again, this can remove
// false leaks. Then, read kmemleak again. If we get leaks now, then
// hopefully these are true positives during the previous testing cycle.
uint64 start = current_time_ms();
if (write(fd, "scan", 4) != 4)
fail("failed to write(%s, \"scan\")", KMEMLEAK_FILE);
sleep(1);
// Account for MSECS_MIN_AGE
// (1 second less because scanning will take at least a second).
while (current_time_ms() - start < 4 * 1000)
sleep(1);
if (write(fd, "scan", 4) != 4)
fail("failed to write(%s, \"scan\")", KMEMLEAK_FILE);
static char buf[128 << 10];
ssize_t n = read(fd, buf, sizeof(buf) - 1);
if (n < 0)
fail("failed to read(%s)", KMEMLEAK_FILE);
int nleaks = 0;
if (n != 0) {
sleep(1);
if (write(fd, "scan", 4) != 4)
fail("failed to write(%s, \"scan\")", KMEMLEAK_FILE);
if (lseek(fd, 0, SEEK_SET) < 0)
fail("failed to lseek(%s)", KMEMLEAK_FILE);
n = read(fd, buf, sizeof(buf) - 1);
if (n < 0)
fail("failed to read(%s)", KMEMLEAK_FILE);
buf[n] = 0;
char* pos = buf;
char* end = buf + n;
while (pos < end) {
char* next = strstr(pos + 1, "unreferenced object");
if (!next)
next = end;
char prev = *next;
*next = 0;
#if SYZ_EXECUTOR
int f;
for (f = 0; f < nframes; f++) {
if (strstr(pos, frames[f]))
break;
}
if (f != nframes) {
*next = prev;
pos = next;
continue;
}
#endif
// BUG in output should be recognized by manager.
fprintf(stderr, "BUG: memory leak\n%s\n", pos);
*next = prev;
pos = next;
nleaks++;
}
}
if (write(fd, "clear", 5) != 5)
fail("failed to write(%s, \"clear\")", KMEMLEAK_FILE);
close(fd);
if (nleaks)
doexit(1);
}
#endif
#if SYZ_EXECUTOR || SYZ_BINFMT_MISC
#include <fcntl.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
static void setup_binfmt_misc()
{
if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0)) {
debug("mount(binfmt_misc) failed: %d\n", errno);
}
write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:");
write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:\x02::./file0:POC");
}
#endif
#if SYZ_EXECUTOR || SYZ_KCSAN
#define KCSAN_DEBUGFS_FILE "/sys/kernel/debug/kcsan"
static void setup_kcsan()
{
if (!write_file(KCSAN_DEBUGFS_FILE, "on"))
fail("failed to enable KCSAN");
}
#if SYZ_EXECUTOR // currently only used by executor
static void setup_kcsan_filterlist(char** frames, int nframes, bool blacklist)
{
int fd = open(KCSAN_DEBUGFS_FILE, O_WRONLY);
if (fd == -1)
fail("failed to open(\"%s\")", KCSAN_DEBUGFS_FILE);
const char* const filtertype = blacklist ? "blacklist" : "whitelist";
printf("adding functions to KCSAN %s: ", filtertype);
dprintf(fd, "%s\n", filtertype);
for (int i = 0; i < nframes; ++i) {
printf("'%s' ", frames[i]);
dprintf(fd, "!%s\n", frames[i]);
}
printf("\n");
close(fd);
}
#define SYZ_HAVE_KCSAN 1
#endif
#endif