diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 43083afffe0f..06a6fba9f531 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -73,15 +73,6 @@ enum sw_event_ids { PERF_SW_EVENTS_MAX = 7, }; -/* - * IRQ-notification data record type: - */ -enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, -}; - #define __PERF_COUNTER_MASK(name) \ (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ PERF_COUNTER_##name##_SHIFT) @@ -102,6 +93,17 @@ enum perf_counter_record_type { #define PERF_COUNTER_EVENT_SHIFT 0 #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) +/* + * Bits that can be set in hw_event.record_type to request information + * in the overflow packets. + */ +enum perf_counter_record_format { + PERF_RECORD_IP = 1U << 0, + PERF_RECORD_TID = 1U << 1, + PERF_RECORD_GROUP = 1U << 2, + PERF_RECORD_CALLCHAIN = 1U << 3, +}; + /* * Bits that can be set in hw_event.read_format to request that * reads on the counter should return the indicated quantities, @@ -125,8 +127,8 @@ struct perf_counter_hw_event { __u64 config; __u64 irq_period; - __u64 record_type; - __u64 read_format; + __u32 record_type; + __u32 read_format; __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ @@ -137,12 +139,10 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - include_tid : 1, /* include the tid */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ - callchain : 1, /* add callchain data */ - __reserved_1 : 51; + __reserved_1 : 53; __u32 extra_config_len; __u32 __reserved_4; @@ -212,15 +212,21 @@ struct perf_event_header { enum perf_event_type { - PERF_EVENT_GROUP = 1, + PERF_EVENT_MMAP = 1, + PERF_EVENT_MUNMAP = 2, - PERF_EVENT_MMAP = 2, - PERF_EVENT_MUNMAP = 3, - - PERF_EVENT_OVERFLOW = 1UL << 31, - __PERF_EVENT_IP = 1UL << 30, - __PERF_EVENT_TID = 1UL << 29, - __PERF_EVENT_CALLCHAIN = 1UL << 28, + /* + * Half the event type space is reserved for the counter overflow + * bitfields, as found in hw_event.record_type. + * + * These events will have types of the form: + * PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } * + */ + PERF_EVENT_COUNTER_OVERFLOW = 1UL << 31, + __PERF_EVENT_IP = PERF_RECORD_IP, + __PERF_EVENT_TID = PERF_RECORD_TID, + __PERF_EVENT_GROUP = PERF_RECORD_GROUP, + __PERF_EVENT_CALLCHAIN = PERF_RECORD_CALLCHAIN, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 860cdc26bd7a..995063df910f 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1765,27 +1765,34 @@ static void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } -static void perf_output_simple(struct perf_counter *counter, - int nmi, struct pt_regs *regs) +void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs) { int ret; + u64 record_type = counter->hw_event.record_type; struct perf_output_handle handle; struct perf_event_header header; u64 ip; struct { u32 pid, tid; } tid_entry; + struct { + u64 event; + u64 counter; + } group_entry; struct perf_callchain_entry *callchain = NULL; int callchain_size = 0; - header.type = PERF_EVENT_OVERFLOW; + header.type = PERF_EVENT_COUNTER_OVERFLOW; header.size = sizeof(header); - ip = instruction_pointer(regs); - header.type |= __PERF_EVENT_IP; - header.size += sizeof(ip); + if (record_type & PERF_RECORD_IP) { + ip = instruction_pointer(regs); + header.type |= __PERF_EVENT_IP; + header.size += sizeof(ip); + } - if (counter->hw_event.include_tid) { + if (record_type & PERF_RECORD_TID) { /* namespace issues */ tid_entry.pid = current->group_leader->pid; tid_entry.tid = current->pid; @@ -1794,7 +1801,13 @@ static void perf_output_simple(struct perf_counter *counter, header.size += sizeof(tid_entry); } - if (counter->hw_event.callchain) { + if (record_type & PERF_RECORD_GROUP) { + header.type |= __PERF_EVENT_GROUP; + header.size += sizeof(u64) + + counter->nr_siblings * sizeof(group_entry); + } + + if (record_type & PERF_RECORD_CALLCHAIN) { callchain = perf_callchain(regs); if (callchain) { @@ -1810,71 +1823,37 @@ static void perf_output_simple(struct perf_counter *counter, return; perf_output_put(&handle, header); - perf_output_put(&handle, ip); - if (counter->hw_event.include_tid) + if (record_type & PERF_RECORD_IP) + perf_output_put(&handle, ip); + + if (record_type & PERF_RECORD_TID) perf_output_put(&handle, tid_entry); + if (record_type & PERF_RECORD_GROUP) { + struct perf_counter *leader, *sub; + u64 nr = counter->nr_siblings; + + perf_output_put(&handle, nr); + + leader = counter->group_leader; + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + if (sub != counter) + sub->hw_ops->read(sub); + + group_entry.event = sub->hw_event.config; + group_entry.counter = atomic64_read(&sub->count); + + perf_output_put(&handle, group_entry); + } + } + if (callchain) perf_output_copy(&handle, callchain, callchain_size); perf_output_end(&handle); } -static void perf_output_group(struct perf_counter *counter, int nmi) -{ - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_counter *leader, *sub; - unsigned int size; - struct { - u64 event; - u64 counter; - } entry; - int ret; - - size = sizeof(header) + counter->nr_siblings * sizeof(entry); - - ret = perf_output_begin(&handle, counter, size, nmi); - if (ret) - return; - - header.type = PERF_EVENT_GROUP; - header.size = size; - - perf_output_put(&handle, header); - - leader = counter->group_leader; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - if (sub != counter) - sub->hw_ops->read(sub); - - entry.event = sub->hw_event.config; - entry.counter = atomic64_read(&sub->count); - - perf_output_put(&handle, entry); - } - - perf_output_end(&handle); -} - -void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs) -{ - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - return; - - case PERF_RECORD_IRQ: - perf_output_simple(counter, nmi, regs); - break; - - case PERF_RECORD_GROUP: - perf_output_group(counter, nmi); - break; - } -} - /* * mmap tracking */