mirror of
https://github.com/team-infusion-developers/android_kernel_samsung_msm8976.git
synced 2024-11-01 02:21:16 +00:00
5881a5ab50
commit 3f3295709edea6268ff1609855f498035286af73 upstream.
The current int_sqrt() computation is sub-optimal for the case of small
@x. Which is the interesting case when we're going to do cumulative
distribution functions on idle times, which we assume to be a random
variable, where the target residency of the deepest idle state gives an
upper bound on the variable (5e6ns on recent Intel chips).
In the case of small @x, the compute loop:
while (m != 0) {
b = y + m;
y >>= 1;
if (x >= b) {
x -= b;
y += m;
}
m >>= 2;
}
can be reduced to:
while (m > x)
m >>= 2;
Because y==0, b==m and until x>=m y will remain 0.
And while this is computationally equivalent, it runs much faster
because there's less code, in particular less branches.
cycles: branches: branch-misses:
OLD:
hot: 45.109444 +- 0.044117 44.333392 +- 0.002254 0.018723 +- 0.000593
cold: 187.737379 +- 0.156678 44.333407 +- 0.002254 6.272844 +- 0.004305
PRE:
hot: 67.937492 +- 0.064124 66.999535 +- 0.000488 0.066720 +- 0.001113
cold: 232.004379 +- 0.332811 66.999527 +- 0.000488 6.914634 +- 0.006568
POST:
hot: 43.633557 +- 0.034373 45.333132 +- 0.002277 0.023529 +- 0.000681
cold: 207.438411 +- 0.125840 45.333132 +- 0.002277 6.976486 +- 0.004219
Averages computed over all values <128k using a LFSR to generate order.
Cold numbers have a LFSR based branch trace buffer 'confuser' ran between
each int_sqrt() invocation.
Link: http://lkml.kernel.org/r/20171020164644.876503355@infradead.org
Fixes:
|
||
---|---|---|
.. | ||
lz4 | ||
lzo | ||
mpi | ||
raid6 | ||
reed_solomon | ||
xz | ||
zlib_deflate | ||
zlib_inflate | ||
.gitignore | ||
argv_split.c | ||
asn1_decoder.c | ||
atomic64.c | ||
atomic64_test.c | ||
audit.c | ||
average.c | ||
bcd.c | ||
bch.c | ||
bitmap.c | ||
bitrev.c | ||
bsearch.c | ||
btree.c | ||
bug.c | ||
build_OID_registry | ||
bust_spinlocks.c | ||
check_signature.c | ||
checksum.c | ||
clz_tab.c | ||
cmdline.c | ||
cordic.c | ||
cpu-notifier-error-inject.c | ||
cpu_rmap.c | ||
cpumask.c | ||
crc-ccitt.c | ||
crc-itu-t.c | ||
crc-t10dif.c | ||
crc7.c | ||
crc8.c | ||
crc16.c | ||
crc32.c | ||
crc32defs.h | ||
ctype.c | ||
debug_locks.c | ||
debugobjects.c | ||
dec_and_lock.c | ||
decompress.c | ||
decompress_bunzip2.c | ||
decompress_inflate.c | ||
decompress_unlzma.c | ||
decompress_unlzo.c | ||
decompress_unxz.c | ||
devres.c | ||
digsig.c | ||
div64.c | ||
dma-debug.c | ||
dump_stack.c | ||
dynamic_debug.c | ||
dynamic_queue_limits.c | ||
earlycpio.c | ||
extable.c | ||
fault-inject.c | ||
fdt.c | ||
fdt_ro.c | ||
fdt_rw.c | ||
fdt_strerror.c | ||
fdt_sw.c | ||
fdt_wip.c | ||
find_last_bit.c | ||
find_next_bit.c | ||
flex_array.c | ||
flex_proportions.c | ||
gcd.c | ||
gen_crc32table.c | ||
genalloc.c | ||
halfmd4.c | ||
hexdump.c | ||
hweight.c | ||
idr.c | ||
inflate.c | ||
int_sqrt.c | ||
interval_tree.c | ||
interval_tree_test_main.c | ||
iomap.c | ||
iomap_copy.c | ||
iommu-helper.c | ||
ioremap.c | ||
iovec.c | ||
irq_regs.c | ||
is_single_threaded.c | ||
jedec_ddr_data.c | ||
kasprintf.c | ||
Kconfig | ||
Kconfig.debug | ||
Kconfig.kasan | ||
Kconfig.kgdb | ||
Kconfig.kmemcheck | ||
kfifo.c | ||
klist.c | ||
kobject.c | ||
kobject_uevent.c | ||
kstrtox.c | ||
kstrtox.h | ||
lcm.c | ||
libcrc32c.c | ||
list_debug.c | ||
list_sort.c | ||
llist.c | ||
locking-selftest-hardirq.h | ||
locking-selftest-mutex.h | ||
locking-selftest-rlock-hardirq.h | ||
locking-selftest-rlock-softirq.h | ||
locking-selftest-rlock.h | ||
locking-selftest-rsem.h | ||
locking-selftest-softirq.h | ||
locking-selftest-spin-hardirq.h | ||
locking-selftest-spin-softirq.h | ||
locking-selftest-spin.h | ||
locking-selftest-wlock-hardirq.h | ||
locking-selftest-wlock-softirq.h | ||
locking-selftest-wlock.h | ||
locking-selftest-wsem.h | ||
locking-selftest.c | ||
lru_cache.c | ||
Makefile | ||
md5.c | ||
memory-notifier-error-inject.c | ||
memweight.c | ||
nlattr.c | ||
notifier-error-inject.c | ||
notifier-error-inject.h | ||
of-reconfig-notifier-error-inject.c | ||
oid_registry.c | ||
parser.c | ||
pci_iomap.c | ||
percpu_counter.c | ||
plist.c | ||
pm-notifier-error-inject.c | ||
prio_heap.c | ||
proportions.c | ||
qmi_encdec.c | ||
qmi_encdec_priv.h | ||
radix-tree.c | ||
random32.c | ||
ratelimit.c | ||
rational.c | ||
rbtree.c | ||
rbtree_test.c | ||
reciprocal_div.c | ||
scatterlist.c | ||
sha1.c | ||
show_mem.c | ||
smp_processor_id.c | ||
sort.c | ||
stmp_device.c | ||
string.c | ||
string_helpers.c | ||
strncpy_from_user.c | ||
strnlen_user.c | ||
swiotlb.c | ||
syscall.c | ||
test-kstrtox.c | ||
test-string_helpers.c | ||
textsearch.c | ||
timerqueue.c | ||
ts_bm.c | ||
ts_fsm.c | ||
ts_kmp.c | ||
ucs2_string.c | ||
usercopy.c | ||
uuid.c | ||
vsprintf.c |