mirror of
https://github.com/followmsi/android_kernel_google_msm.git
synced 2024-11-06 23:17:41 +00:00
da5edc8231
Optimize the current version of the shift-and-subtract (hardware) algorithm, described by John von Newmann[1] and Guy L Steele. Iterating 1,000,000 times, perf shows for the current version: Performance counter stats for './sqrt-curr' (10 runs): 27.170996 task-clock # 0.979 CPUs utilized ( +- 3.19% ) 3 context-switches # 0.103 K/sec ( +- 4.76% ) 0 cpu-migrations # 0.004 K/sec ( +-100.00% ) 104 page-faults # 0.004 M/sec ( +- 0.16% ) 64,921,199 cycles # 2.389 GHz ( +- 0.03% ) 28,967,789 stalled-cycles-frontend # 44.62% frontend cycles idle ( +- 0.18% ) <not supported> stalled-cycles-backend 104,502,623 instructions # 1.61 insns per cycle # 0.28 stalled cycles per insn ( +- 0.00% ) 34,088,368 branches # 1254.587 M/sec ( +- 0.00% ) 4,901 branch-misses # 0.01% of all branches ( +- 1.32% ) 0.027763015 seconds time elapsed ( +- 3.22% ) And for the new version: Performance counter stats for './sqrt-new' (10 runs): 0.496869 task-clock # 0.519 CPUs utilized ( +- 2.38% ) 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.403 K/sec ( +-100.00% ) 104 page-faults # 0.209 M/sec ( +- 0.15% ) 590,760 cycles # 1.189 GHz ( +- 2.35% ) 395,053 stalled-cycles-frontend # 66.87% frontend cycles idle ( +- 3.67% ) <not supported> stalled-cycles-backend 398,963 instructions # 0.68 insns per cycle # 0.99 stalled cycles per insn ( +- 0.39% ) 70,228 branches # 141.341 M/sec ( +- 0.36% ) 3,364 branch-misses # 4.79% of all branches ( +- 5.45% ) 0.000957440 seconds time elapsed ( +- 2.42% ) Furthermore, this saves space in instruction text: text data bss dec hex filename 111 0 0 111 6f lib/int_sqrt-baseline.o 89 0 0 89 59 lib/int_sqrt.o [1] http://en.wikipedia.org/wiki/First_Draft_of_a_Report_on_the_EDVAC Signed-off-by: Davidlohr Bueso <davidlohr.bueso@hp.com> Reviewed-by: Jonathan Gonzalez <jgonzlez@linets.cl> Tested-by: Jonathan Gonzalez <jgonzlez@linets.cl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: flar2 <asegaert@gmail.com> |
||
---|---|---|
.. | ||
lz4 | ||
lzo | ||
mpi | ||
raid6 | ||
reed_solomon | ||
xz | ||
zlib_deflate | ||
zlib_inflate | ||
.gitignore | ||
argv_split.c | ||
atomic64.c | ||
atomic64_test.c | ||
audit.c | ||
average.c | ||
bcd.c | ||
bch.c | ||
bitmap.c | ||
bitrev.c | ||
bsearch.c | ||
btree.c | ||
bug.c | ||
bust_spinlocks.c | ||
check_signature.c | ||
checksum.c | ||
clz_tab.c | ||
cmdline.c | ||
cordic.c | ||
cpu-notifier-error-inject.c | ||
cpu_rmap.c | ||
cpumask.c | ||
crc-ccitt.c | ||
crc-itu-t.c | ||
crc-t10dif.c | ||
crc7.c | ||
crc8.c | ||
crc16.c | ||
crc32.c | ||
crc32defs.h | ||
ctype.c | ||
debug_locks.c | ||
debugobjects.c | ||
dec_and_lock.c | ||
decompress.c | ||
decompress_bunzip2.c | ||
decompress_inflate.c | ||
decompress_unlzma.c | ||
decompress_unlzo.c | ||
decompress_unxz.c | ||
devres.c | ||
digsig.c | ||
div64.c | ||
dma-debug.c | ||
dump_stack.c | ||
dynamic_debug.c | ||
dynamic_queue_limits.c | ||
extable.c | ||
fault-inject.c | ||
find_last_bit.c | ||
find_next_bit.c | ||
flex_array.c | ||
gcd.c | ||
gen_crc32table.c | ||
genalloc.c | ||
halfmd4.c | ||
hexdump.c | ||
hweight.c | ||
idr.c | ||
inflate.c | ||
int_sqrt.c | ||
iomap.c | ||
iomap_copy.c | ||
iommu-helper.c | ||
ioremap.c | ||
irq_regs.c | ||
is_single_threaded.c | ||
kasprintf.c | ||
Kconfig | ||
Kconfig.debug | ||
Kconfig.kgdb | ||
Kconfig.kmemcheck | ||
klist.c | ||
kobject.c | ||
kobject_uevent.c | ||
kstrtox.c | ||
kstrtox.h | ||
lcm.c | ||
libcrc32c.c | ||
list_debug.c | ||
list_sort.c | ||
llist.c | ||
locking-selftest-hardirq.h | ||
locking-selftest-mutex.h | ||
locking-selftest-rlock-hardirq.h | ||
locking-selftest-rlock-softirq.h | ||
locking-selftest-rlock.h | ||
locking-selftest-rsem.h | ||
locking-selftest-softirq.h | ||
locking-selftest-spin-hardirq.h | ||
locking-selftest-spin-softirq.h | ||
locking-selftest-spin.h | ||
locking-selftest-wlock-hardirq.h | ||
locking-selftest-wlock-softirq.h | ||
locking-selftest-wlock.h | ||
locking-selftest-wsem.h | ||
locking-selftest.c | ||
lru_cache.c | ||
Makefile | ||
md5.c | ||
memory_alloc.c | ||
nlattr.c | ||
parser.c | ||
pci_iomap.c | ||
percpu_counter.c | ||
plist.c | ||
prio_heap.c | ||
prio_tree.c | ||
proportions.c | ||
radix-tree.c | ||
random32.c | ||
ratelimit.c | ||
rational.c | ||
rbtree.c | ||
reciprocal_div.c | ||
rwsem-spinlock.c | ||
rwsem.c | ||
scatterlist.c | ||
sha1.c | ||
show_mem.c | ||
smp_processor_id.c | ||
sort.c | ||
spinlock_debug.c | ||
string.c | ||
string_helpers.c | ||
swiotlb.c | ||
syscall.c | ||
test-kstrtox.c | ||
textsearch.c | ||
timerqueue.c | ||
ts_bm.c | ||
ts_fsm.c | ||
ts_kmp.c | ||
uuid.c | ||
vsprintf.c |