linux/lib
Christoph Lameter 819a72af8d percpucounter: Optimize __percpu_counter_add a bit through the use of this_cpu() options.
The this_cpu_* options can be used to optimize __percpu_counter_add a bit. Avoids
some address arithmetic and saves 12 bytes.

Before:


00000000000001d3 <__percpu_counter_add>:
 1d3:	55                   	push   %rbp
 1d4:	48 89 e5             	mov    %rsp,%rbp
 1d7:	41 55                	push   %r13
 1d9:	41 54                	push   %r12
 1db:	53                   	push   %rbx
 1dc:	48 89 fb             	mov    %rdi,%rbx
 1df:	48 83 ec 08          	sub    $0x8,%rsp
 1e3:	4c 8b 67 30          	mov    0x30(%rdi),%r12
 1e7:	65 4c 03 24 25 00 00 	add    %gs:0x0,%r12
 1ee:	00 00
 1f0:	4d 63 2c 24          	movslq (%r12),%r13
 1f4:	48 63 c2             	movslq %edx,%rax
 1f7:	49 01 f5             	add    %rsi,%r13
 1fa:	49 39 c5             	cmp    %rax,%r13
 1fd:	7d 0a                	jge    209 <__percpu_counter_add+0x36>
 1ff:	f7 da                	neg    %edx
 201:	48 63 d2             	movslq %edx,%rdx
 204:	49 39 d5             	cmp    %rdx,%r13
 207:	7f 1e                	jg     227 <__percpu_counter_add+0x54>
 209:	48 89 df             	mov    %rbx,%rdi
 20c:	e8 00 00 00 00       	callq  211 <__percpu_counter_add+0x3e>
 211:	4c 01 6b 18          	add    %r13,0x18(%rbx)
 215:	48 89 df             	mov    %rbx,%rdi
 218:	41 c7 04 24 00 00 00 	movl   $0x0,(%r12)
 21f:	00
 220:	e8 00 00 00 00       	callq  225 <__percpu_counter_add+0x52>
 225:	eb 04                	jmp    22b <__percpu_counter_add+0x58>
 227:	45 89 2c 24          	mov    %r13d,(%r12)
 22b:	5b                   	pop    %rbx
 22c:	5b                   	pop    %rbx
 22d:	41 5c                	pop    %r12
 22f:	41 5d                	pop    %r13
 231:	c9                   	leaveq
 232:	c3                   	retq


After:

00000000000001d3 <__percpu_counter_add>:
 1d3:	55                   	push   %rbp
 1d4:	48 63 ca             	movslq %edx,%rcx
 1d7:	48 89 e5             	mov    %rsp,%rbp
 1da:	41 54                	push   %r12
 1dc:	53                   	push   %rbx
 1dd:	48 89 fb             	mov    %rdi,%rbx
 1e0:	48 8b 47 30          	mov    0x30(%rdi),%rax
 1e4:	65 44 8b 20          	mov    %gs:(%rax),%r12d
 1e8:	4d 63 e4             	movslq %r12d,%r12
 1eb:	49 01 f4             	add    %rsi,%r12
 1ee:	49 39 cc             	cmp    %rcx,%r12
 1f1:	7d 0a                	jge    1fd <__percpu_counter_add+0x2a>
 1f3:	f7 da                	neg    %edx
 1f5:	48 63 d2             	movslq %edx,%rdx
 1f8:	49 39 d4             	cmp    %rdx,%r12
 1fb:	7f 21                	jg     21e <__percpu_counter_add+0x4b>
 1fd:	48 89 df             	mov    %rbx,%rdi
 200:	e8 00 00 00 00       	callq  205 <__percpu_counter_add+0x32>
 205:	4c 01 63 18          	add    %r12,0x18(%rbx)
 209:	48 8b 43 30          	mov    0x30(%rbx),%rax
 20d:	48 89 df             	mov    %rbx,%rdi
 210:	65 c7 00 00 00 00 00 	movl   $0x0,%gs:(%rax)
 217:	e8 00 00 00 00       	callq  21c <__percpu_counter_add+0x49>
 21c:	eb 04                	jmp    222 <__percpu_counter_add+0x4f>
 21e:	65 44 89 20          	mov    %r12d,%gs:(%rax)
 222:	5b                   	pop    %rbx
 223:	41 5c                	pop    %r12
 225:	c9                   	leaveq
 226:	c3                   	retq

Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2010-12-17 15:07:18 +01:00
..
lzo
raid6 Move .gitignore from drivers/md to lib/raid6 2010-08-30 17:35:52 +10:00
reed_solomon
zlib_deflate
zlib_inflate
.gitignore
argv_split.c
atomic64_test.c ARM: 6213/1: atomic64_test: add ARM as supported architecture 2010-07-27 10:43:46 +01:00
atomic64.c
audit.c
bcd.c
bitmap.c lib/bitmap.c: use hex_to_bin() 2010-10-26 16:52:18 -07:00
bitrev.c
btree.c lib/btree: fix possible NULL pointer dereference 2010-05-15 12:48:10 -07:00
bug.c modules: Fix module_bug_list list corruption race 2010-10-05 11:29:27 -07:00
bust_spinlocks.c
check_signature.c
checksum.c
cmdline.c
cpu-notifier-error-inject.c fault-injection: add CPU notifier error injection module 2010-05-27 09:12:48 -07:00
cpumask.c include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h 2010-03-30 22:02:32 +09:00
crc7.c
crc16.c
crc32.c revert "crc32: use __BYTE_ORDER macro for endian detection" 2010-05-26 08:19:23 -07:00
crc32defs.h
crc-ccitt.c
crc-itu-t.c
crc-t10dif.c
ctype.c
debug_locks.c Revert "debug_locks: set oops_in_progress if we will log messages." 2010-11-29 15:18:28 -08:00
debugobjects.c Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip 2010-05-18 08:17:58 -07:00
dec_and_lock.c
decompress_bunzip2.c lib/decompress_bunzip2.c: fix checkstack warning 2010-08-11 08:59:23 -07:00
decompress_inflate.c
decompress_unlzma.c
decompress_unlzo.c lib: fix the use of LZO to decompress initramfs images 2010-04-24 11:31:25 -07:00
decompress.c
devres.c lib/devres.c: fix comment typo 2010-07-11 22:16:32 +02:00
div64.c div64_u64(): improve precision on 32bit platforms 2010-10-26 16:52:19 -07:00
dma-debug.c llseek: automatically add .llseek fop 2010-10-15 15:53:27 +02:00
dump_stack.c
dynamic_debug.c Merge git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core-2.6 2010-10-22 19:36:42 -07:00
extable.c
fault-inject.c
find_last_bit.c
find_next_bit.c
flex_array.c flex_array: add helpers to get and put to make pointers easy to use 2010-08-09 20:45:09 -07:00
gcd.c
gen_crc32table.c crc32: major optimization 2010-05-25 08:07:06 -07:00
genalloc.c genalloc: fix allocation from end of pool 2010-06-29 15:29:30 -07:00
halfmd4.c
hexdump.c lib: introduce common method to convert hex digits 2010-05-25 08:07:05 -07:00
hweight.c x86: Add optimized popcnt variants 2010-04-06 15:52:11 -07:00
idr.c docbook: add idr/ida to kernel-api docbook 2010-10-26 17:40:56 -07:00
inflate.c MN10300: Don't try and #include <linux/slab.h> in lib/inflate.c from bootloader 2010-08-12 09:51:35 -07:00
int_sqrt.c
iomap_copy.c
iomap.c
iommu-helper.c iommu: inline iommu_num_pages 2010-08-09 20:45:05 -07:00
ioremap.c x86, ioremap: Fix incorrect physical address handling in PAE mode 2010-07-09 11:42:03 -07:00
irq_regs.c
is_single_threaded.c
kasprintf.c include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h 2010-03-30 22:02:32 +09:00
Kconfig Merge branch 'async' of macbook:git/btrfs-unstable 2010-08-09 10:36:44 +01:00
Kconfig.debug Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx 2010-10-27 19:04:36 -07:00
Kconfig.kgdb mips,kgdb: kdb low level trap catch and stack trace 2010-05-20 21:04:26 -05:00
Kconfig.kmemcheck
kernel_lock.c
klist.c
kobject_uevent.c kobject_uevent: fix typo in comments 2010-08-23 18:12:46 -07:00
kobject.c kobject: Introduce kset_find_obj_hinted. 2010-10-22 10:16:44 -07:00
kref.c kref: remove kref_set 2010-05-21 09:37:29 -07:00
lcm.c
libcrc32c.c
list_debug.c list debugging: warn when deleting a deleted entry 2010-08-09 20:45:08 -07:00
list_sort.c lib/list_sort: test: check element addresses 2010-10-26 16:52:19 -07:00
locking-selftest-hardirq.h
locking-selftest-mutex.h
locking-selftest-rlock-hardirq.h
locking-selftest-rlock-softirq.h
locking-selftest-rlock.h
locking-selftest-rsem.h
locking-selftest-softirq.h
locking-selftest-spin-hardirq.h
locking-selftest-spin-softirq.h
locking-selftest-spin.h
locking-selftest-wlock-hardirq.h
locking-selftest-wlock-softirq.h
locking-selftest-wlock.h
locking-selftest-wsem.h
locking-selftest.c
lru_cache.c
Makefile Merge branch 'async' of macbook:git/btrfs-unstable 2010-08-09 10:36:44 +01:00
nlattr.c
parser.c lib/parser: cleanup match_number() 2010-10-26 16:52:19 -07:00
percpu_counter.c percpucounter: Optimize __percpu_counter_add a bit through the use of this_cpu() options. 2010-12-17 15:07:18 +01:00
plist.c
prio_heap.c
prio_tree.c
proportions.c
radix-tree.c radix-tree: fix RCU bug 2010-11-12 07:55:32 -08:00
random32.c Merge branch 'master' into for-next 2010-06-16 18:08:13 +02:00
ratelimit.c ratelimit: fix the return value when __ratelimit() fails to acquire the lock 2010-04-07 08:38:04 -07:00
rational.c
rbtree.c rbtree: Undo augmented trees performance damage and regression 2010-07-05 14:43:50 +02:00
reciprocal_div.c
rwsem-spinlock.c rwsem generic spinlock: use IRQ save/restore spinlocks 2010-04-07 16:15:05 -07:00
rwsem.c rwsem: smaller wrappers around rwsem_down_failed_common 2010-08-09 20:45:11 -07:00
scatterlist.c scatterlist: prevent invalid free when alloc fails 2010-08-30 19:55:09 +02:00
sha1.c
show_mem.c
smp_processor_id.c
sort.c
spinlock_debug.c
string_helpers.c
string.c
swiotlb.c swiotlb: Use page alignment for early buffer allocation 2010-10-11 17:08:36 -04:00
syscall.c
textsearch.c include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h 2010-03-30 22:02:32 +09:00
ts_bm.c
ts_fsm.c
ts_kmp.c
uuid.c Unified UUID/GUID definition 2010-05-19 22:40:47 -04:00
vsprintf.c lib: fix scnprintf() if @size is == 0 2010-10-26 16:52:16 -07:00