mirror of
https://github.com/FEX-Emu/linux.git
synced 2025-01-12 20:31:49 +00:00
5b52330bbf
What started as a rather straightforward race condition reported by Dmitry using the syzkaller fuzzer ended up revealing some major problems with how the audit subsystem managed its netlink sockets and its connection with the userspace audit daemon. Fixing this properly had quite the cascading effect and what we are left with is this rather large and complicated patch. My initial goal was to try and decompose this patch into multiple smaller patches, but the way these changes are intertwined makes it difficult to split these changes into meaningful pieces that don't break or somehow make things worse for the intermediate states. The patch makes a number of changes, but the most significant are highlighted below: * The auditd tracking variables, e.g. audit_sock, are now gone and replaced by a RCU/spin_lock protected variable auditd_conn which is a structure containing all of the auditd tracking information. * We no longer track the auditd sock directly, instead we track it via the network namespace in which it resides and we use the audit socket associated with that namespace. In spirit, this is what the code was trying to do prior to this patch (at least I think that is what the original authors intended), but it was done rather poorly and added a layer of obfuscation that only masked the underlying problems. * Big backlog queue cleanup, again. In v4.10 we made some pretty big changes to how the audit backlog queues work, here we haven't changed the queue design so much as cleaned up the implementation. Brought about by the locking changes, we've simplified kauditd_thread() quite a bit by consolidating the queue handling into a new helper function, kauditd_send_queue(), which allows us to eliminate a lot of very similar code and makes the looping logic in kauditd_thread() clearer. * All netlink messages sent to auditd are now sent via auditd_send_unicast_skb(). Other than just making sense, this makes the lock handling easier. * Change the audit_log_start() sleep behavior so that we never sleep on auditd events (unchanged) or if the caller is holding the audit_cmd_mutex (changed). Previously we didn't sleep if the caller was auditd or if the message type fell between a certain range; the type check was a poor effort of doing what the cmd_mutex check now does. Richard Guy Briggs originally proposed not sleeping the cmd_mutex owner several years ago but his patch wasn't acceptable at the time. At least the idea lives on here. * A problem with the lost record counter has been resolved. Steve Grubb and I both happened to notice this problem and according to some quick testing by Steve, this problem goes back quite some time. It's largely a harmless problem, although it may have left some careful sysadmins quite puzzled. Cc: <stable@vger.kernel.org> # 4.10.x- Reported-by: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
351 lines
11 KiB
C
351 lines
11 KiB
C
/* audit -- definition of audit_context structure and supporting types
|
|
*
|
|
* Copyright 2003-2004 Red Hat, Inc.
|
|
* Copyright 2005 Hewlett-Packard Development Company, L.P.
|
|
* Copyright 2005 IBM Corporation
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/audit.h>
|
|
#include <linux/skbuff.h>
|
|
#include <uapi/linux/mqueue.h>
|
|
#include <linux/tty.h>
|
|
|
|
/* AUDIT_NAMES is the number of slots we reserve in the audit_context
|
|
* for saving names from getname(). If we get more names we will allocate
|
|
* a name dynamically and also add those to the list anchored by names_list. */
|
|
#define AUDIT_NAMES 5
|
|
|
|
/* At task start time, the audit_state is set in the audit_context using
|
|
a per-task filter. At syscall entry, the audit_state is augmented by
|
|
the syscall filter. */
|
|
enum audit_state {
|
|
AUDIT_DISABLED, /* Do not create per-task audit_context.
|
|
* No syscall-specific audit records can
|
|
* be generated. */
|
|
AUDIT_BUILD_CONTEXT, /* Create the per-task audit_context,
|
|
* and fill it in at syscall
|
|
* entry time. This makes a full
|
|
* syscall record available if some
|
|
* other part of the kernel decides it
|
|
* should be recorded. */
|
|
AUDIT_RECORD_CONTEXT /* Create the per-task audit_context,
|
|
* always fill it in at syscall entry
|
|
* time, and always write out the audit
|
|
* record at syscall exit time. */
|
|
};
|
|
|
|
/* Rule lists */
|
|
struct audit_watch;
|
|
struct audit_fsnotify_mark;
|
|
struct audit_tree;
|
|
struct audit_chunk;
|
|
|
|
struct audit_entry {
|
|
struct list_head list;
|
|
struct rcu_head rcu;
|
|
struct audit_krule rule;
|
|
};
|
|
|
|
struct audit_cap_data {
|
|
kernel_cap_t permitted;
|
|
kernel_cap_t inheritable;
|
|
union {
|
|
unsigned int fE; /* effective bit of file cap */
|
|
kernel_cap_t effective; /* effective set of process */
|
|
};
|
|
};
|
|
|
|
/* When fs/namei.c:getname() is called, we store the pointer in name and bump
|
|
* the refcnt in the associated filename struct.
|
|
*
|
|
* Further, in fs/namei.c:path_lookup() we store the inode and device.
|
|
*/
|
|
struct audit_names {
|
|
struct list_head list; /* audit_context->names_list */
|
|
|
|
struct filename *name;
|
|
int name_len; /* number of chars to log */
|
|
bool hidden; /* don't log this record */
|
|
|
|
unsigned long ino;
|
|
dev_t dev;
|
|
umode_t mode;
|
|
kuid_t uid;
|
|
kgid_t gid;
|
|
dev_t rdev;
|
|
u32 osid;
|
|
struct audit_cap_data fcap;
|
|
unsigned int fcap_ver;
|
|
unsigned char type; /* record type */
|
|
/*
|
|
* This was an allocated audit_names and not from the array of
|
|
* names allocated in the task audit context. Thus this name
|
|
* should be freed on syscall exit.
|
|
*/
|
|
bool should_free;
|
|
};
|
|
|
|
struct audit_proctitle {
|
|
int len; /* length of the cmdline field. */
|
|
char *value; /* the cmdline field */
|
|
};
|
|
|
|
/* The per-task audit context. */
|
|
struct audit_context {
|
|
int dummy; /* must be the first element */
|
|
int in_syscall; /* 1 if task is in a syscall */
|
|
enum audit_state state, current_state;
|
|
unsigned int serial; /* serial number for record */
|
|
int major; /* syscall number */
|
|
struct timespec ctime; /* time of syscall entry */
|
|
unsigned long argv[4]; /* syscall arguments */
|
|
long return_code;/* syscall return code */
|
|
u64 prio;
|
|
int return_valid; /* return code is valid */
|
|
/*
|
|
* The names_list is the list of all audit_names collected during this
|
|
* syscall. The first AUDIT_NAMES entries in the names_list will
|
|
* actually be from the preallocated_names array for performance
|
|
* reasons. Except during allocation they should never be referenced
|
|
* through the preallocated_names array and should only be found/used
|
|
* by running the names_list.
|
|
*/
|
|
struct audit_names preallocated_names[AUDIT_NAMES];
|
|
int name_count; /* total records in names_list */
|
|
struct list_head names_list; /* struct audit_names->list anchor */
|
|
char *filterkey; /* key for rule that triggered record */
|
|
struct path pwd;
|
|
struct audit_aux_data *aux;
|
|
struct audit_aux_data *aux_pids;
|
|
struct sockaddr_storage *sockaddr;
|
|
size_t sockaddr_len;
|
|
/* Save things to print about task_struct */
|
|
pid_t pid, ppid;
|
|
kuid_t uid, euid, suid, fsuid;
|
|
kgid_t gid, egid, sgid, fsgid;
|
|
unsigned long personality;
|
|
int arch;
|
|
|
|
pid_t target_pid;
|
|
kuid_t target_auid;
|
|
kuid_t target_uid;
|
|
unsigned int target_sessionid;
|
|
u32 target_sid;
|
|
char target_comm[TASK_COMM_LEN];
|
|
|
|
struct audit_tree_refs *trees, *first_trees;
|
|
struct list_head killed_trees;
|
|
int tree_count;
|
|
|
|
int type;
|
|
union {
|
|
struct {
|
|
int nargs;
|
|
long args[6];
|
|
} socketcall;
|
|
struct {
|
|
kuid_t uid;
|
|
kgid_t gid;
|
|
umode_t mode;
|
|
u32 osid;
|
|
int has_perm;
|
|
uid_t perm_uid;
|
|
gid_t perm_gid;
|
|
umode_t perm_mode;
|
|
unsigned long qbytes;
|
|
} ipc;
|
|
struct {
|
|
mqd_t mqdes;
|
|
struct mq_attr mqstat;
|
|
} mq_getsetattr;
|
|
struct {
|
|
mqd_t mqdes;
|
|
int sigev_signo;
|
|
} mq_notify;
|
|
struct {
|
|
mqd_t mqdes;
|
|
size_t msg_len;
|
|
unsigned int msg_prio;
|
|
struct timespec abs_timeout;
|
|
} mq_sendrecv;
|
|
struct {
|
|
int oflag;
|
|
umode_t mode;
|
|
struct mq_attr attr;
|
|
} mq_open;
|
|
struct {
|
|
pid_t pid;
|
|
struct audit_cap_data cap;
|
|
} capset;
|
|
struct {
|
|
int fd;
|
|
int flags;
|
|
} mmap;
|
|
struct {
|
|
int argc;
|
|
} execve;
|
|
struct {
|
|
char *name;
|
|
} module;
|
|
};
|
|
int fds[2];
|
|
struct audit_proctitle proctitle;
|
|
};
|
|
|
|
extern u32 audit_ever_enabled;
|
|
|
|
extern void audit_copy_inode(struct audit_names *name,
|
|
const struct dentry *dentry,
|
|
struct inode *inode);
|
|
extern void audit_log_cap(struct audit_buffer *ab, char *prefix,
|
|
kernel_cap_t *cap);
|
|
extern void audit_log_name(struct audit_context *context,
|
|
struct audit_names *n, const struct path *path,
|
|
int record_num, int *call_panic);
|
|
|
|
extern int auditd_test_task(const struct task_struct *task);
|
|
|
|
#define AUDIT_INODE_BUCKETS 32
|
|
extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
|
|
|
|
static inline int audit_hash_ino(u32 ino)
|
|
{
|
|
return (ino & (AUDIT_INODE_BUCKETS-1));
|
|
}
|
|
|
|
/* Indicates that audit should log the full pathname. */
|
|
#define AUDIT_NAME_FULL -1
|
|
|
|
extern int audit_match_class(int class, unsigned syscall);
|
|
extern int audit_comparator(const u32 left, const u32 op, const u32 right);
|
|
extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right);
|
|
extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right);
|
|
extern int parent_len(const char *path);
|
|
extern int audit_compare_dname_path(const char *dname, const char *path, int plen);
|
|
extern struct sk_buff *audit_make_reply(__u32 portid, int seq, int type,
|
|
int done, int multi,
|
|
const void *payload, int size);
|
|
extern void audit_panic(const char *message);
|
|
|
|
struct audit_netlink_list {
|
|
__u32 portid;
|
|
struct net *net;
|
|
struct sk_buff_head q;
|
|
};
|
|
|
|
int audit_send_list(void *);
|
|
|
|
extern int selinux_audit_rule_update(void);
|
|
|
|
extern struct mutex audit_filter_mutex;
|
|
extern int audit_del_rule(struct audit_entry *);
|
|
extern void audit_free_rule_rcu(struct rcu_head *);
|
|
extern struct list_head audit_filter_list[];
|
|
|
|
extern struct audit_entry *audit_dupe_rule(struct audit_krule *old);
|
|
|
|
extern void audit_log_d_path_exe(struct audit_buffer *ab,
|
|
struct mm_struct *mm);
|
|
|
|
extern struct tty_struct *audit_get_tty(struct task_struct *tsk);
|
|
extern void audit_put_tty(struct tty_struct *tty);
|
|
|
|
/* audit watch functions */
|
|
#ifdef CONFIG_AUDIT_WATCH
|
|
extern void audit_put_watch(struct audit_watch *watch);
|
|
extern void audit_get_watch(struct audit_watch *watch);
|
|
extern int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op);
|
|
extern int audit_add_watch(struct audit_krule *krule, struct list_head **list);
|
|
extern void audit_remove_watch_rule(struct audit_krule *krule);
|
|
extern char *audit_watch_path(struct audit_watch *watch);
|
|
extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev);
|
|
|
|
extern struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pathname, int len);
|
|
extern char *audit_mark_path(struct audit_fsnotify_mark *mark);
|
|
extern void audit_remove_mark(struct audit_fsnotify_mark *audit_mark);
|
|
extern void audit_remove_mark_rule(struct audit_krule *krule);
|
|
extern int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_t dev);
|
|
extern int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old);
|
|
extern int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark);
|
|
|
|
#else
|
|
#define audit_put_watch(w) {}
|
|
#define audit_get_watch(w) {}
|
|
#define audit_to_watch(k, p, l, o) (-EINVAL)
|
|
#define audit_add_watch(k, l) (-EINVAL)
|
|
#define audit_remove_watch_rule(k) BUG()
|
|
#define audit_watch_path(w) ""
|
|
#define audit_watch_compare(w, i, d) 0
|
|
|
|
#define audit_alloc_mark(k, p, l) (ERR_PTR(-EINVAL))
|
|
#define audit_mark_path(m) ""
|
|
#define audit_remove_mark(m)
|
|
#define audit_remove_mark_rule(k)
|
|
#define audit_mark_compare(m, i, d) 0
|
|
#define audit_exe_compare(t, m) (-EINVAL)
|
|
#define audit_dupe_exe(n, o) (-EINVAL)
|
|
#endif /* CONFIG_AUDIT_WATCH */
|
|
|
|
#ifdef CONFIG_AUDIT_TREE
|
|
extern struct audit_chunk *audit_tree_lookup(const struct inode *);
|
|
extern void audit_put_chunk(struct audit_chunk *);
|
|
extern bool audit_tree_match(struct audit_chunk *, struct audit_tree *);
|
|
extern int audit_make_tree(struct audit_krule *, char *, u32);
|
|
extern int audit_add_tree_rule(struct audit_krule *);
|
|
extern int audit_remove_tree_rule(struct audit_krule *);
|
|
extern void audit_trim_trees(void);
|
|
extern int audit_tag_tree(char *old, char *new);
|
|
extern const char *audit_tree_path(struct audit_tree *);
|
|
extern void audit_put_tree(struct audit_tree *);
|
|
extern void audit_kill_trees(struct list_head *);
|
|
#else
|
|
#define audit_remove_tree_rule(rule) BUG()
|
|
#define audit_add_tree_rule(rule) -EINVAL
|
|
#define audit_make_tree(rule, str, op) -EINVAL
|
|
#define audit_trim_trees() (void)0
|
|
#define audit_put_tree(tree) (void)0
|
|
#define audit_tag_tree(old, new) -EINVAL
|
|
#define audit_tree_path(rule) "" /* never called */
|
|
#define audit_kill_trees(list) BUG()
|
|
#endif
|
|
|
|
extern char *audit_unpack_string(void **, size_t *, size_t);
|
|
|
|
extern pid_t audit_sig_pid;
|
|
extern kuid_t audit_sig_uid;
|
|
extern u32 audit_sig_sid;
|
|
|
|
extern int audit_filter(int msgtype, unsigned int listtype);
|
|
|
|
#ifdef CONFIG_AUDITSYSCALL
|
|
extern int __audit_signal_info(int sig, struct task_struct *t);
|
|
static inline int audit_signal_info(int sig, struct task_struct *t)
|
|
{
|
|
if (auditd_test_task(t) || (audit_signals && !audit_dummy_context()))
|
|
return __audit_signal_info(sig, t);
|
|
return 0;
|
|
}
|
|
extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
|
|
extern struct list_head *audit_killed_trees(void);
|
|
#else
|
|
#define audit_signal_info(s,t) AUDIT_DISABLED
|
|
#define audit_filter_inodes(t,c) AUDIT_DISABLED
|
|
#endif
|
|
|
|
extern struct mutex audit_cmd_mutex;
|