mirror of
https://github.com/team-infusion-developers/android_kernel_samsung_msm8976.git
synced 2024-09-21 03:43:03 +00:00
c182462032
We want to avoid lots of different copy_page implementations, settling for something that is "good enough" everywhere and hopefully easy to understand and maintain whilst we're at it. This patch reworks our copy_page implementation based on discussions with Cavium on the list and benchmarking on Cortex-A processors so that: - The loop is unrolled to copy 128 bytes per iteration - The reads are offset so that we read from the next 128-byte block in the same iteration that we store the previous block - Explicit prefetch instructions are removed for now, since they hurt performance on CPUs with hardware prefetching - The loop exit condition is calculated at the start of the loop Change-Id: I0d9f3bbe4efa2751f41432a3b4b299fbb0e494be Signed-off-by: Will Deacon <will.deacon@arm.com> Tested-by: Andrew Pinski <apinski@cavium.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
77 lines
1.7 KiB
ArmAsm
77 lines
1.7 KiB
ArmAsm
/*
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <linux/const.h>
|
|
#include <asm/assembler.h>
|
|
#include <asm/page.h>
|
|
|
|
/*
|
|
* Copy a page from src to dest (both are page aligned)
|
|
*
|
|
* Parameters:
|
|
* x0 - dest
|
|
* x1 - src
|
|
*/
|
|
ENTRY(copy_page)
|
|
ldp x2, x3, [x1]
|
|
ldp x4, x5, [x1, #16]
|
|
ldp x6, x7, [x1, #32]
|
|
ldp x8, x9, [x1, #48]
|
|
ldp x10, x11, [x1, #64]
|
|
ldp x12, x13, [x1, #80]
|
|
ldp x14, x15, [x1, #96]
|
|
ldp x16, x17, [x1, #112]
|
|
|
|
mov x18, #(PAGE_SIZE - 128)
|
|
add x1, x1, #128
|
|
1:
|
|
subs x18, x18, #128
|
|
|
|
stnp x2, x3, [x0]
|
|
ldp x2, x3, [x1]
|
|
stnp x4, x5, [x0, #16]
|
|
ldp x4, x5, [x1, #16]
|
|
stnp x6, x7, [x0, #32]
|
|
ldp x6, x7, [x1, #32]
|
|
stnp x8, x9, [x0, #48]
|
|
ldp x8, x9, [x1, #48]
|
|
stnp x10, x11, [x0, #64]
|
|
ldp x10, x11, [x1, #64]
|
|
stnp x12, x13, [x0, #80]
|
|
ldp x12, x13, [x1, #80]
|
|
stnp x14, x15, [x0, #96]
|
|
ldp x14, x15, [x1, #96]
|
|
stnp x16, x17, [x0, #112]
|
|
ldp x16, x17, [x1, #112]
|
|
|
|
add x0, x0, #128
|
|
add x1, x1, #128
|
|
|
|
b.gt 1b
|
|
|
|
stnp x2, x3, [x0]
|
|
stnp x4, x5, [x0, #16]
|
|
stnp x6, x7, [x0, #32]
|
|
stnp x8, x9, [x0, #48]
|
|
stnp x10, x11, [x0, #64]
|
|
stnp x12, x13, [x0, #80]
|
|
stnp x14, x15, [x0, #96]
|
|
stnp x16, x17, [x0, #112]
|
|
|
|
ret
|
|
ENDPROC(copy_page)
|