edac: cortex_arm64: Poll to check for cache errors
By design, the CortexA53/A57 processors are incapable of gernerating interrupts or PMU events once a single-bit error is observed in the L2 caches. Hence, we need to poll the L2MERRSR register to periodically check for single bit errors. We need to do this for L2 on both clusters. Change-Id: I76a440b820f23c9667a5596cf550ff7725ec1cf5 Signed-off-by: Rohit Vaswani <rvaswani@codeaurora.org>
This commit is contained in:
parent
093d936988
commit
15c8581540
|
@ -22,6 +22,7 @@ Optional properties:
|
|||
- reg: Should contain physical address of the CCI register space
|
||||
- reg-names: Should contain 'cci'. Must be present if 'reg' property is present
|
||||
- qcom,apply-cti-pmu-wa: Indicates if the driver needs to apply the CTI PMU Workaround. Relevant for 8994V1.
|
||||
- poll-delay-msec: Indicates how often the edac check callback should be called. Time in msec.
|
||||
|
||||
Example:
|
||||
cpu_cache_erp {
|
||||
|
|
|
@ -15,10 +15,10 @@
|
|||
|
||||
#ifdef CONFIG_EDAC_CORTEX_ARM64
|
||||
void arm64_erp_local_dbe_handler(void);
|
||||
void arm64_check_cache_ecc(void);
|
||||
void arm64_check_cache_ecc(void *info);
|
||||
#else
|
||||
static inline void arm64_erp_local_dbe_handler(void) { }
|
||||
static inline void arm64_check_cache_ecc(void) { }
|
||||
static inline void arm64_check_cache_ecc(void *info) { }
|
||||
#endif
|
||||
|
||||
static inline void atomic_scrub(void *addr, int size)
|
||||
|
|
|
@ -557,7 +557,7 @@ static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
|
|||
pr_crit("CPU%u: stopping\n", cpu);
|
||||
show_regs(regs);
|
||||
dump_stack();
|
||||
arm64_check_cache_ecc();
|
||||
arm64_check_cache_ecc(NULL);
|
||||
raw_spin_unlock(&stop_lock);
|
||||
}
|
||||
|
||||
|
|
|
@ -786,7 +786,7 @@ static int msm_cti_pmu_wa_cpu_notify(struct notifier_block *self,
|
|||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
void arm64_check_cache_ecc(void)
|
||||
void arm64_check_cache_ecc(void *info)
|
||||
{
|
||||
if (panic_handler_drvdata)
|
||||
check_sbe_event(panic_handler_drvdata);
|
||||
|
@ -795,17 +795,36 @@ void arm64_check_cache_ecc(void)
|
|||
static int arm64_erp_panic_notify(struct notifier_block *this,
|
||||
unsigned long event, void *ptr)
|
||||
{
|
||||
arm64_check_cache_ecc();
|
||||
arm64_check_cache_ecc(NULL);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static void arm64_monitor_cache_errors(struct edac_device_ctl_info *edev)
|
||||
{
|
||||
struct cpumask cluster_mask, old_mask;
|
||||
int cpu;
|
||||
|
||||
cpumask_clear(&cluster_mask);
|
||||
cpumask_clear(&old_mask);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
cpumask_copy(&cluster_mask, topology_core_cpumask(cpu));
|
||||
if (cpumask_equal(&cluster_mask, &old_mask))
|
||||
continue;
|
||||
cpumask_copy(&old_mask, &cluster_mask);
|
||||
smp_call_function_any(&cluster_mask,
|
||||
arm64_check_cache_ecc, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static int arm64_cpu_erp_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct erp_drvdata *drv;
|
||||
struct resource *r;
|
||||
int cpu;
|
||||
u32 poll_msec;
|
||||
struct erp_drvdata * __percpu *drv_cpu =
|
||||
alloc_percpu(struct erp_drvdata *);
|
||||
|
||||
|
@ -823,18 +842,24 @@ static int arm64_cpu_erp_probe(struct platform_device *pdev)
|
|||
if (!drv->edev_ctl)
|
||||
return -ENOMEM;
|
||||
|
||||
rc = of_property_read_u32(pdev->dev.of_node, "poll-delay-ms",
|
||||
&poll_msec);
|
||||
if (!rc) {
|
||||
drv->edev_ctl->edac_check = arm64_monitor_cache_errors;
|
||||
drv->edev_ctl->poll_msec = poll_msec;
|
||||
drv->edev_ctl->defer_work = 1;
|
||||
}
|
||||
drv->edev_ctl->dev = dev;
|
||||
drv->edev_ctl->mod_name = dev_name(dev);
|
||||
drv->edev_ctl->dev_name = dev_name(dev);
|
||||
drv->edev_ctl->ctl_name = "cache";
|
||||
drv->edev_ctl->panic_on_ce = panic_on_ce;
|
||||
drv->edev_ctl->panic_on_ue = ARM64_ERP_PANIC_ON_UE;
|
||||
|
||||
rc = edac_device_add_device(drv->edev_ctl);
|
||||
if (rc)
|
||||
goto out_mem;
|
||||
|
||||
drv->edev_ctl->panic_on_ce = panic_on_ce;
|
||||
drv->edev_ctl->panic_on_ue = ARM64_ERP_PANIC_ON_UE;
|
||||
|
||||
r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cci");
|
||||
if (r)
|
||||
drv->cci_base = devm_ioremap_resource(dev, r);
|
||||
|
@ -921,6 +946,7 @@ out_irq:
|
|||
|
||||
abort_handler_drvdata = drv;
|
||||
panic_handler_drvdata = drv;
|
||||
|
||||
return 0;
|
||||
|
||||
out_dev:
|
||||
|
|
Loading…
Reference in New Issue