iommu: msm: flush page tables at the end after map/unmap

Flushing the cache for each PTE update could be very time
consuming if the number of PTEs getting updated are in
order of few thousands. Instead, don't perform cache ops
for each PTE update and flush the updated page table once
at the end of map/unmap routine. This saves roughly 60%
of the total time spent by map/unmap calls. Few numbers
with/without this optimization applied.

Numebrs are taken at Cortex-A53 single core clocked at
1.2 GHz.

AARCH64 (without optimization)
    size iommu_map_range  iommu_unmap
     64K           14 us         9 us
      2M          176 us        16 us
     12M         1016 us        54 us
     20M         1809 us       100 us

AARCH64 (with optimization)
    size iommu_map_range  iommu_unmap
     64K           18 us        12 us
      2M           77 us        18 us
     12M          396 us        47 us
     20M          648 us        73 us

Change-Id: I5c5f9e5cec5a7aed5b478be52d943fcaa1c0ed84
Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
This commit is contained in:
Chintan Pandya 2015-06-05 19:28:53 +05:30
parent 6d3d5c278e
commit 5ab7ef8486
5 changed files with 93 additions and 20 deletions

View File

@ -1081,6 +1081,7 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
if (ret)
goto fail;
msm_iommu_flush_pagetable(&priv->pt, va, len);
fail:
spin_unlock_irqrestore(&msm_iommu_spin_lock, flags);
return ret;
@ -1102,6 +1103,7 @@ static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
if (ret < 0)
goto fail;
msm_iommu_flush_pagetable(&priv->pt, va, len);
ret = __flush_iotlb(domain);
msm_iommu_pagetable_free_tables(&priv->pt, va, len);
@ -1128,6 +1130,7 @@ static int msm_iommu_map_range(struct iommu_domain *domain, unsigned long va,
}
ret = msm_iommu_pagetable_map_range(&priv->pt, va, sg, len, prot);
msm_iommu_flush_pagetable(&priv->pt, va, len);
fail:
spin_unlock_irqrestore(&msm_iommu_spin_lock, flags);
@ -1145,6 +1148,7 @@ static int msm_iommu_unmap_range(struct iommu_domain *domain, unsigned long va,
priv = domain->priv;
msm_iommu_pagetable_unmap_range(&priv->pt, va, len);
msm_iommu_flush_pagetable(&priv->pt, va, len);
__flush_iotlb(domain);
msm_iommu_pagetable_free_tables(&priv->pt, va, len);

View File

@ -612,6 +612,15 @@ int msm_iommu_pagetable_map(struct msm_iommu_pt *pt, unsigned long va,
return ret;
}
void msm_iommu_flush_pagetable(struct msm_iommu_pt *pt, unsigned long va,
size_t len)
{
/* Consolidated flush of page tables has not been implemented for
* v7S because this driver anyway takes care of combining flush
* for last level PTEs
*/
}
phys_addr_t msm_iommu_iova_to_phys_soft(struct iommu_domain *domain,
dma_addr_t va)
{

View File

@ -30,4 +30,6 @@ phys_addr_t msm_iommu_iova_to_phys_soft(struct iommu_domain *domain,
dma_addr_t va);
void msm_iommu_pagetable_free_tables(struct msm_iommu_pt *pt, unsigned long va,
size_t len);
void msm_iommu_flush_pagetable(struct msm_iommu_pt *pt, unsigned long va,
size_t len);
#endif

View File

@ -22,6 +22,7 @@
#include "msm_iommu_pagetable.h"
#define NUM_PT_LEVEL 4
#define NUM_PTE 512 /* generic for all levels */
#define NUM_FL_PTE 512 /* First level */
#define NUM_SL_PTE 512 /* Second level */
#define NUM_TL_PTE 512 /* Third level */
@ -100,19 +101,12 @@ static void __msm_iommu_pagetable_unmap_range(struct msm_iommu_pt *pt,
unsigned long va, size_t len,
u32 silent);
static inline void clean_pte(u64 *start, u64 *end, s32 redirect)
{
if (!redirect)
dmac_flush_range(start, end);
}
s32 msm_iommu_pagetable_alloc(struct msm_iommu_pt *pt)
{
pt->fl_table = (u64 *) get_zeroed_page(GFP_ATOMIC);
if (!pt->fl_table)
return -ENOMEM;
clean_pte(pt->fl_table, pt->fl_table + NUM_FL_PTE, pt->redirect);
return 0;
}
@ -223,12 +217,10 @@ static u64 *make_next_level_table(s32 redirect, u64 *pte)
pr_err("Could not allocate next level table\n");
goto fail;
}
clean_pte(next_level_table, next_level_table + NUM_FL_PTE, redirect);
/* Leave APTable bits 0 to let next level decide access permissions */
*pte = (((phys_addr_t)__pa(next_level_table)) &
FLSL_BASE_MASK) | FLSL_TYPE_TABLE;
clean_pte(pte, pte + 1, redirect);
fail:
return next_level_table;
}
@ -246,7 +238,6 @@ static inline s32 ll_4k_map(u64 *ll_pte, phys_addr_t pa,
}
*ll_pte = upper_attr | (pa & LL_PAGE_MASK) | lower_attr | LL_TYPE_PAGE;
clean_pte(ll_pte, ll_pte + 1, redirect);
fail:
return ret;
}
@ -272,7 +263,6 @@ static inline s32 ll_64k_map(u64 *ll_pte, phys_addr_t pa,
for (i = 0; i < 16; ++i)
*(ll_pte+i) = upper_attr | (pa & LL_PAGE_MASK) |
lower_attr | LL_TYPE_PAGE;
clean_pte(ll_pte, ll_pte + 16, redirect);
fail:
return ret;
}
@ -291,7 +281,6 @@ static inline s32 tl_2m_map(u64 *tl_pte, phys_addr_t pa,
*tl_pte = upper_attr | (pa & FLSL_BLOCK_MASK) |
lower_attr | FLSL_TYPE_BLOCK;
clean_pte(tl_pte, tl_pte + 1, redirect);
fail:
return ret;
}
@ -317,7 +306,6 @@ static inline s32 tl_32m_map(u64 *tl_pte, phys_addr_t pa,
for (i = 0; i < 16; ++i)
*(tl_pte+i) = upper_attr | (pa & FLSL_BLOCK_MASK) |
lower_attr | FLSL_TYPE_BLOCK;
clean_pte(tl_pte, tl_pte + 16, redirect);
fail:
return ret;
}
@ -337,7 +325,6 @@ static inline s32 sl_1G_map(u64 *sl_pte, phys_addr_t pa,
*sl_pte = upper_attr | (pa & FLSL_1G_BLOCK_MASK) |
lower_attr | FLSL_TYPE_BLOCK;
clean_pte(sl_pte, sl_pte + 1, redirect);
fail:
return ret;
}
@ -653,7 +640,6 @@ static u64 clear_4th_level(u64 va, u64 *ll_pte, u64 len, u32 redirect,
}
num_pte = end_offset - start_offset;
clean_pte(ll_pte, ll_pte + num_pte, redirect);
chunk_size = SZ_4K * num_pte;
return chunk_size;
@ -680,7 +666,6 @@ static u64 clear_3rd_level(u64 va, u64 *tl_pte, u64 len, u32 redirect,
BUG();
*tl_pte = 0;
clean_pte(tl_pte, tl_pte + 1, redirect);
return SZ_2M;
} else if (type == FLSL_TYPE_TABLE) {
ll_table = FOLLOW_TO_NEXT_TABLE(tl_pte);
@ -694,7 +679,6 @@ static u64 clear_3rd_level(u64 va, u64 *tl_pte, u64 len, u32 redirect,
if (p) {
free_pagetable_level(p, 4, 0);
*tl_pte = 0;
clean_pte(tl_pte, tl_pte + 1, redirect);
}
}
} else {
@ -726,7 +710,6 @@ static u64 clear_2nd_level(u64 va, u64 *sl_pte, u64 len, u32 redirect,
BUG();
*sl_pte = 0;
clean_pte(sl_pte, sl_pte + 1, redirect);
return SZ_1G;
} else if (type == FLSL_TYPE_TABLE) {
tl_table = FOLLOW_TO_NEXT_TABLE(sl_pte);
@ -740,7 +723,6 @@ static u64 clear_2nd_level(u64 va, u64 *sl_pte, u64 len, u32 redirect,
if (p) {
free_pagetable_level(p, 3, 0);
*sl_pte = 0;
clean_pte(sl_pte, sl_pte + 1, redirect);
}
}
} else {
@ -785,7 +767,6 @@ static u64 clear_1st_level(u64 va, u64 *fl_pte, u64 len, u32 redirect,
if (p) {
free_pagetable_level(p, 2, 0);
*fl_pte = 0;
clean_pte(fl_pte, fl_pte + 1, redirect);
}
}
} else {
@ -832,6 +813,66 @@ static void __msm_iommu_pagetable_unmap_range(struct msm_iommu_pt *pt,
}
}
static void flush_pagetable_level(u64 base, int level, unsigned long va,
size_t len)
{
unsigned long i;
unsigned long start;
unsigned long len_offset;
unsigned long end = NUM_FL_PTE;
unsigned long level_granurality;
unsigned long va_left = va;
size_t len_left = len;
u64 *table = phys_to_virt(base);
if (level <= NUM_PT_LEVEL) {
switch (level) {
case 1:
start = FL_OFFSET(va);
level_granurality = 1ULL << FL_SHIFT;
len_offset = FL_OFFSET(len);
break;
case 2:
start = SL_OFFSET(va);
level_granurality = 1ULL << SL_SHIFT;
len_offset = SL_OFFSET(len);
break;
case 3:
start = TL_OFFSET(va);
level_granurality = 1ULL << TL_SHIFT;
len_offset = TL_OFFSET(len);
break;
case 4:
start = LL_OFFSET(va);
level_granurality = 1ULL << LL_SHIFT;
len_offset = LL_OFFSET(len);
goto flush_this_level;
default:
return;
}
}
if ((len / level_granurality) + start < NUM_PTE)
end = start + len_offset;
else
end = NUM_PTE;
for (i = start; i <= end; ++i) {
if ((table[i] & FLSL_TYPE_TABLE) == FLSL_TYPE_TABLE) {
u64 p = table[i] & FLSL_BASE_MASK;
if (p)
flush_pagetable_level(p, level + 1, va_left,
len_left);
}
va_left += level_granurality;
len_left -= level_granurality;
}
flush_this_level:
dmac_flush_range(table + start, table + end);
}
int msm_iommu_pagetable_map_range(struct msm_iommu_pt *pt, unsigned long va,
struct scatterlist *sg, size_t len, int prot)
{
@ -862,6 +903,15 @@ size_t msm_iommu_pagetable_unmap(struct msm_iommu_pt *pt, unsigned long va,
return len;
}
void msm_iommu_flush_pagetable(struct msm_iommu_pt *pt, unsigned long va,
size_t len)
{
u64 *fl_table = pt->fl_table;
if (!pt->redirect)
flush_pagetable_level(virt_to_phys(fl_table), 1, va, len);
}
static phys_addr_t get_phys_from_va(unsigned long va, u64 *table, int level)
{
u64 type;

View File

@ -713,6 +713,14 @@ static void __msm_iommu_pagetable_unmap_range(struct msm_iommu_pt *pt,
}
}
void msm_iommu_flush_pagetable(struct msm_iommu_pt *pt, unsigned long va,
size_t len)
{
/* Consolidated flush of page tables has not been implemented for
* LPAE driver as of now.
*/
}
phys_addr_t msm_iommu_iova_to_phys_soft(struct iommu_domain *domain,
dma_addr_t va)
{