From 553575f1ae048aa44682b46b3c51929a0b3ad337 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 18 Nov 2011 03:32:01 -0500 Subject: [PATCH 01/15] tools turbostat: recognize and run properly on IVB Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 8b2d37b59c9..4b05b445969 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -809,6 +809,8 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x2C: /* Westmere EP - Gulftown */ case 0x2A: /* SNB */ case 0x2D: /* SNB Xeon */ + case 0x3A: /* IVB */ + case 0x3D: /* IVB Xeon */ return 1; case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ From 8df0eb7c9d96f9e82f233ee8b74e0f0c8471f868 Mon Sep 17 00:00:00 2001 From: Kurt Garloff Date: Tue, 17 Jan 2012 04:18:02 -0500 Subject: [PATCH 02/15] ACPI: Store SRAT table revision In SRAT v1, we had 8bit proximity domain (PXM) fields; SRAT v2 provides 32bits for these. The new fields were reserved before. According to the ACPI spec, the OS must disregrard reserved fields. In order to know whether or not, we must know what version the SRAT table has. This patch stores the SRAT table revision for later consumption by arch specific __init functions. Signed-off-by: Kurt Garloff Signed-off-by: Len Brown --- drivers/acpi/numa.c | 6 ++++++ include/acpi/acpi_numa.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 3b5c3189fd9..e56f3be7b07 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -45,6 +45,8 @@ static int pxm_to_node_map[MAX_PXM_DOMAINS] static int node_to_pxm_map[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; +unsigned char acpi_srat_revision __initdata; + int pxm_to_node(int pxm) { if (pxm < 0) @@ -255,9 +257,13 @@ acpi_parse_memory_affinity(struct acpi_subtable_header * header, static int __init acpi_parse_srat(struct acpi_table_header *table) { + struct acpi_table_srat *srat; if (!table) return -EINVAL; + srat = (struct acpi_table_srat *)table; + acpi_srat_revision = srat->header.revision; + /* Real work done in acpi_table_parse_srat below. */ return 0; diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h index 17397267217..451823cb883 100644 --- a/include/acpi/acpi_numa.h +++ b/include/acpi/acpi_numa.h @@ -15,6 +15,7 @@ extern int pxm_to_node(int); extern int node_to_pxm(int); extern void __acpi_map_pxm_to_node(int, int); extern int acpi_map_pxm_to_node(int); +extern unsigned char acpi_srat_revision; #endif /* CONFIG_ACPI_NUMA */ #endif /* __ACP_NUMA_H */ From cd298f60a2451a16e0f077404bf69b62ec868733 Mon Sep 17 00:00:00 2001 From: Kurt Garloff Date: Tue, 17 Jan 2012 04:20:31 -0500 Subject: [PATCH 03/15] ACPI, x86: Use SRAT table rev to use 8bit or 32bit PXM fields (x86/x86-64) In SRAT v1, we had 8bit proximity domain (PXM) fields; SRAT v2 provides 32bits for these. The new fields were reserved before. According to the ACPI spec, the OS must disregrard reserved fields. x86/x86-64 was rather inconsistent prior to this patch; it used 8 bits for the pxm field in cpu_affinity, but 32 bits in mem_affinity. This patch makes it consistent: Either use 8 bits consistently (SRAT rev 1 or lower) or 32 bits (SRAT rev 2 or higher). cc: x86@kernel.org Signed-off-by: Kurt Garloff Signed-off-by: Len Brown --- arch/x86/mm/srat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 81dbfdeb080..7efd0c615d5 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -104,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) return; pxm = pa->proximity_domain_lo; + if (acpi_srat_revision >= 2) + pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8; node = setup_node(pxm); if (node < 0) { printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); @@ -155,6 +157,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) start = ma->base_address; end = start + ma->length; pxm = ma->proximity_domain; + if (acpi_srat_revision <= 1) + pxm &= 0xff; node = setup_node(pxm); if (node < 0) { printk(KERN_ERR "SRAT: Too many proximity domains.\n"); From 9f10f6a520deb3639fac78d81151a3ade88b4e7f Mon Sep 17 00:00:00 2001 From: Kurt Garloff Date: Tue, 17 Jan 2012 04:21:49 -0500 Subject: [PATCH 04/15] ACPI, ia64: Use SRAT table rev to use 8bit or 16/32bit PXM fields (ia64) In SRAT v1, we had 8bit proximity domain (PXM) fields; SRAT v2 provides 32bits for these. The new fields were reserved before. According to the ACPI spec, the OS must disregrard reserved fields. ia64 did handle the PXM fields almost consistently, but depending on sgi's sn2 platform. This patch leaves the sn2 logic in, but does also use 16/32 bits for PXM if the SRAT has rev 2 or higher. The patch also adds __init to the two pxm accessor functions, as they access __initdata now and are called from an __init function only anyway. Note that the code only uses 16 bits for the PXM field in the processor proximity field; the patch does not address this as 16 bits are more than enough. Signed-off-by: Kurt Garloff Signed-off-by: Len Brown --- arch/ia64/kernel/acpi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index bfb4d01e0e5..5207035dc06 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -429,22 +429,24 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; static struct acpi_table_slit __initdata *slit_table; cpumask_t early_cpu_possible_map = CPU_MASK_NONE; -static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) +static int __init +get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) { int pxm; pxm = pa->proximity_domain_lo; - if (ia64_platform_is("sn2")) + if (ia64_platform_is("sn2") || acpi_srat_revision >= 2) pxm += pa->proximity_domain_hi[0] << 8; return pxm; } -static int get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma) +static int __init +get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma) { int pxm; pxm = ma->proximity_domain; - if (!ia64_platform_is("sn2")) + if (!ia64_platform_is("sn2") && acpi_srat_revision <= 1) pxm &= 0xff; return pxm; From 39a74fdedd1c1461d6fb6d330b5266886513c98f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 10 Jan 2012 15:48:19 -0800 Subject: [PATCH 05/15] intel_idle: fix API misuse smp_call_function() only lets all other CPUs execute a specific function, while we expect all CPUs do in intel_idle. Without the fix, we could have one cpu which has auto_demotion enabled or has no broadcast timer setup. Usually we don't see impact because auto demotion just harms power and the intel_idle init is called in CPU 0, where boradcast timer delivers interrupt, but this still could be a problem. Cc: stable@vger.kernel.org Signed-off-by: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 5d2f8e13cf0..af0b4a5d52f 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -394,7 +394,7 @@ static int intel_idle_probe(void) if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; else { - smp_call_function(__setup_broadcast_timer, (void *)true, 1); + on_each_cpu(__setup_broadcast_timer, (void *)true, 1); register_cpu_notifier(&setup_broadcast_notifier); } @@ -471,7 +471,7 @@ static int intel_idle_cpuidle_driver_init(void) } if (auto_demotion_disable_flags) - smp_call_function(auto_demotion_disable, NULL, 1); + on_each_cpu(auto_demotion_disable, NULL, 1); return 0; } @@ -568,7 +568,7 @@ static void __exit intel_idle_exit(void) cpuidle_unregister_driver(&intel_idle_driver); if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) { - smp_call_function(__setup_broadcast_timer, (void *)false, 1); + on_each_cpu(__setup_broadcast_timer, (void *)false, 1); unregister_cpu_notifier(&setup_broadcast_notifier); } From d640113fe80e45ebd4a5b420b220d3f6bf37f682 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 13 Dec 2011 09:36:03 +0800 Subject: [PATCH 06/15] ACPI: processor: fix acpi_get_cpuid for UP processor For UP processor, it is likely that no _MAT method or MADT table defined. So currently acpi_get_cpuid(...) always return -1 for UP processor. This is wrong. It should return valid value for CPU0. In the other hand, BIOS may define multiple CPU handles even for UP processor, for example Scope (_PR) { Processor (CPU0, 0x00, 0x00000410, 0x06) {} Processor (CPU1, 0x01, 0x00000410, 0x06) {} Processor (CPU2, 0x02, 0x00000410, 0x06) {} Processor (CPU3, 0x03, 0x00000410, 0x06) {} } We should only return valid value for CPU0's acpi handle. And return invalid value for others. http://marc.info/?t=132329819900003&r=1&w=2 Cc: stable@vger.kernel.org Reported-and-tested-by: wallak@free.fr Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/processor_core.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 3a0428e8435..c850de4c9a1 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -173,8 +173,30 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) apic_id = map_mat_entry(handle, type, acpi_id); if (apic_id == -1) apic_id = map_madt_entry(type, acpi_id); - if (apic_id == -1) - return apic_id; + if (apic_id == -1) { + /* + * On UP processor, there is no _MAT or MADT table. + * So above apic_id is always set to -1. + * + * BIOS may define multiple CPU handles even for UP processor. + * For example, + * + * Scope (_PR) + * { + * Processor (CPU0, 0x00, 0x00000410, 0x06) {} + * Processor (CPU1, 0x01, 0x00000410, 0x06) {} + * Processor (CPU2, 0x02, 0x00000410, 0x06) {} + * Processor (CPU3, 0x03, 0x00000410, 0x06) {} + * } + * + * Ignores apic_id and always return 0 for CPU0's handle. + * Return -1 for other CPU's handle. + */ + if (acpi_id == 0) + return acpi_id; + else + return apic_id; + } #ifdef CONFIG_SMP for_each_possible_cpu(i) { From 3333ea7804201918aa241641cf8493a62b8ad527 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 17 Nov 2011 23:37:00 +0100 Subject: [PATCH 07/15] ACPI processor: Fix error path, also remove sysdev link Signed-off-by: Thomas Renninger Signed-off-by: Len Brown --- drivers/acpi/processor_driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 9d7bc9f6b6c..90719d125e7 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -539,6 +539,7 @@ err_thermal_unregister: thermal_cooling_device_unregister(pr->cdev); err_power_exit: acpi_processor_power_exit(pr, device); + sysfs_remove_link(&device->dev.kobj, "sysdev"); err_free_cpumask: free_cpumask_var(pr->throttling.shared_cpu_map); From 63ff07beaebef2a82da41cf04053ae88b579226d Mon Sep 17 00:00:00 2001 From: Yanmin Zhang Date: Tue, 10 Jan 2012 15:48:21 -0800 Subject: [PATCH 08/15] intel_idle: remove redundant local_irq_disable() call irq disabling happens earlier in process_32.c:cpu_idle. Basically, cpuidle_state->enter is called, cpu irq is disabled. cpuidle_state->enter would turn on irq when exiting. intel_idle doesn't follow this assumption. Although it doesn't cause real issue, it misleads developers. Remove the call to local_irq_disable() at entry. [akpm@linux-foundation.org: add comment] Signed-off-by: Mingming Zhang Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index af0b4a5d52f..920c61c730a 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -232,6 +232,7 @@ static int get_driver_data(int cstate) * @drv: cpuidle driver * @index: index of cpuidle state * + * Must be called under local_irq_disable(). */ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -247,8 +248,6 @@ static int intel_idle(struct cpuidle_device *dev, cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; - local_irq_disable(); - /* * leave_mm() to avoid costly and often unnecessary wakeups * for flushing the user TLB's associated with the active mm. From 2e92c7ad8f269c2b5b7f2a4763675f55f00b75f5 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 15 Dec 2011 01:18:52 +0900 Subject: [PATCH 09/15] ACPI: kernel-parameters.txt : Add intel_idle.max_cstate Add missing intel_idle.max_cstate in kernel-parameters.txt Signed-off-by Masanari Iida Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 81c287fad79..c6a56d8b901 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1035,6 +1035,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. By default, super page will be supported if Intel IOMMU has the capability. With this option, super page will not be supported. + + intel_idle.max_cstate= [KNL,HW,ACPI,X86] + 0 disables intel_idle and fall back on acpi_idle. + 1 to 6 specify maximum depth of C-state. + intremap= [X86-64, Intel-IOMMU] on enable Interrupt Remapping (default) off disable Interrupt Remapping From 95e3ec11491d0cbce9fcdf1cc17a527c114c7dcf Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 15 Dec 2011 13:03:14 +0000 Subject: [PATCH 10/15] intel_idle: Fix a cast to pointer from integer of different size warning in intel_idle Fix the following warning: drivers/idle/intel_idle.c: In function 'intel_idle_cpuidle_devices_init': drivers/idle/intel_idle.c:518:5: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] By making get_driver_data() return a long instead of an int. Signed-off-by: David Howells Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 920c61c730a..18f9ad81410 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -197,7 +197,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { .enter = &intel_idle }, }; -static int get_driver_data(int cstate) +static long get_driver_data(int cstate) { int driver_data; switch (cstate) { From 5c2a9f06a9cd7194f884cdc88144866235dec07d Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Sun, 4 Dec 2011 22:17:29 +0100 Subject: [PATCH 11/15] intel idle: Make idle driver more robust kvm -cpu host passes the original cpuid info to the guest. Latest kvm version seem to return true for mwait_leaf cpuid function on recent Intel CPUs. But it does not return mwait C-states (mwait_substates), instead zero is returned. While real CPUs seem to always return non-zero values, the intel idle driver should not get active in kvm (mwait_substates == 0) case and bail out. Otherwise a Null pointer exception will happen later when the cpuidle subsystem tries to get active: [0.984807] BUG: unable to handle kernel NULL pointer dereference at (null) [0.984807] IP: [<(null)>] (null) ... [0.984807][] ? cpuidle_idle_call+0xb4/0x340 [0.984807][] ? __atomic_notifier_call_chain+0x4c/0x70 [0.984807][] ? cpu_idle+0x78/0xd0 Reference: https://bugzilla.novell.com/show_bug.cgi?id=726296 Cc: stable@vger.kernel.org Signed-off-by: Thomas Renninger CC: Bruno Friedmann Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 18f9ad81410..91aff180ac4 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -347,7 +347,8 @@ static int intel_idle_probe(void) cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || - !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) + !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || + !mwait_substates) return -ENODEV; pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates); From 5e7590d40dc59d5c1889d9e70c9da1f1df3918c6 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 17 Jan 2012 17:35:22 +0100 Subject: [PATCH 12/15] ACPI processor: Remove unneeded cpuidle_unregister_driver call Since commit 46bcfad7a819bd17ac4e831b04405152d59784ab registering and unregistering cpuidle is done in processor_idle.c. Unregistering via: acpi_bus_unregister_driver(&acpi_processor_driver) -> acpi_processor_remove() -> acpi_processor_power_exit() Remove not needed cpuidle_unregister_driver() call from acpi_processor_exit Signed-off-by: Thomas Renninger CC: Deepthi Dharwar Signed-off-by: Len Brown --- drivers/acpi/processor_driver.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 90719d125e7..3616ce5509c 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -828,8 +828,6 @@ static void __exit acpi_processor_exit(void) acpi_bus_unregister_driver(&acpi_processor_driver); - cpuidle_unregister_driver(&acpi_idle_driver); - return; } From 3bd81a8710710f8bf5d1a5ebac315c842c20bdd3 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 17 Jan 2012 22:40:07 +0100 Subject: [PATCH 13/15] ACPI processor: Remove unneeded variable passed by acpi_processor_hotadd_init V2 V2: Fix typo: pr->handle -> pr, here: acpi_processor_hotadd_init(pr) This is a very small part taken from patches which afaik are coming from Yunhong Jiang (for a Xen not a Linus repo?). Cleanup only: no functional change. Advantage (beside cleanup) is that other data of the pr (acpi_processor) struct in the acpi_processor_hotadd_init() is needed later, for example a newly introduced flag: pr->flags.need_hotplug_init Signed-off-by: Thomas Renninger CC: Bjorn Helgaas CC: Jiang, Yunhong Signed-off-by: Len Brown --- drivers/acpi/processor_driver.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 3616ce5509c..b9cbd9b0945 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -82,7 +82,7 @@ MODULE_LICENSE("GPL"); static int acpi_processor_add(struct acpi_device *device); static int acpi_processor_remove(struct acpi_device *device, int type); static void acpi_processor_notify(struct acpi_device *device, u32 event); -static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu); +static acpi_status acpi_processor_hotadd_init(struct acpi_processor *pr); static int acpi_processor_handle_eject(struct acpi_processor *pr); @@ -324,10 +324,8 @@ static int acpi_processor_get_info(struct acpi_device *device) * they are physically not present. */ if (pr->id == -1) { - if (ACPI_FAILURE - (acpi_processor_hotadd_init(pr->handle, &pr->id))) { + if (ACPI_FAILURE(acpi_processor_hotadd_init(pr))) return -ENODEV; - } } /* * On some boxes several processors use the same processor bus id. @@ -721,18 +719,19 @@ processor_walk_namespace_cb(acpi_handle handle, return (AE_OK); } -static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) +static acpi_status acpi_processor_hotadd_init(struct acpi_processor *pr) { + acpi_handle handle = pr->handle; if (!is_processor_present(handle)) { return AE_ERROR; } - if (acpi_map_lsapic(handle, p_cpu)) + if (acpi_map_lsapic(handle, &pr->id)) return AE_ERROR; - if (arch_register_cpu(*p_cpu)) { - acpi_unmap_lsapic(*p_cpu); + if (arch_register_cpu(pr->id)) { + acpi_unmap_lsapic(pr->id); return AE_ERROR; } @@ -749,7 +748,7 @@ static int acpi_processor_handle_eject(struct acpi_processor *pr) return (0); } #else -static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) +static acpi_status acpi_processor_hotadd_init(struct acpi_processor *pr) { return AE_ERROR; } From 65b7f839ceecc0a36c7969c0c9151d5748cd4242 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 17 Jan 2012 22:40:08 +0100 Subject: [PATCH 14/15] intel_idle: Split up and provide per CPU initialization func Function split up, should have no functional change. Provides entry point for physically hotplugged CPUs to initialize and activate cpuidle. Signed-off-by: Thomas Renninger CC: Deepthi Dharwar CC: Shaohua Li CC: Andrew Morton Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 82 ++++++++++++++++++++------------------- include/linux/cpuidle.h | 7 ++++ 2 files changed, 49 insertions(+), 40 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 5d2f8e13cf0..ef0c04d8dc2 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -478,64 +478,60 @@ static int intel_idle_cpuidle_driver_init(void) /* - * intel_idle_cpuidle_devices_init() + * intel_idle_cpu_init() * allocate, initialize, register cpuidle_devices + * @cpu: cpu/core to initialize */ -static int intel_idle_cpuidle_devices_init(void) +int intel_idle_cpu_init(int cpu) { - int i, cstate; + int cstate; struct cpuidle_device *dev; - intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); - if (intel_idle_cpuidle_devices == NULL) - return -ENOMEM; + dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); - for_each_online_cpu(i) { - dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); + dev->state_count = 1; - dev->state_count = 1; + for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { + int num_substates; - for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { - int num_substates; + if (cstate > max_cstate) { + printk(PREFIX "max_cstate %d reached\n", + max_cstate); + break; + } - if (cstate > max_cstate) { - printk(PREFIX "max_cstate %d reached\n", - max_cstate); - break; - } + /* does the state exist in CPUID.MWAIT? */ + num_substates = (mwait_substates >> ((cstate) * 4)) + & MWAIT_SUBSTATE_MASK; + if (num_substates == 0) + continue; + /* is the state not enabled? */ + if (cpuidle_state_table[cstate].enter == NULL) + continue; - /* does the state exist in CPUID.MWAIT? */ - num_substates = (mwait_substates >> ((cstate) * 4)) - & MWAIT_SUBSTATE_MASK; - if (num_substates == 0) - continue; - /* is the state not enabled? */ - if (cpuidle_state_table[cstate].enter == NULL) { - continue; - } - - dev->states_usage[dev->state_count].driver_data = - (void *)get_driver_data(cstate); + dev->states_usage[dev->state_count].driver_data = + (void *)get_driver_data(cstate); dev->state_count += 1; } + dev->cpu = cpu; - dev->cpu = i; - if (cpuidle_register_device(dev)) { - pr_debug(PREFIX "cpuidle_register_device %d failed!\n", - i); - intel_idle_cpuidle_devices_uninit(); - return -EIO; - } + if (cpuidle_register_device(dev)) { + pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu); + intel_idle_cpuidle_devices_uninit(); + return -EIO; } + if (auto_demotion_disable_flags) + smp_call_function_single(cpu, auto_demotion_disable, NULL, 1); + return 0; } static int __init intel_idle_init(void) { - int retval; + int retval, i; /* Do not load intel_idle at all for now if idle= is passed */ if (boot_option_idle_override != IDLE_NO_OVERRIDE) @@ -553,10 +549,16 @@ static int __init intel_idle_init(void) return retval; } - retval = intel_idle_cpuidle_devices_init(); - if (retval) { - cpuidle_unregister_driver(&intel_idle_driver); - return retval; + intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); + if (intel_idle_cpuidle_devices == NULL) + return -ENOMEM; + + for_each_online_cpu(i) { + retval = intel_idle_cpu_init(i); + if (retval) { + cpuidle_unregister_driver(&intel_idle_driver); + return retval; + } } return 0; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 7408af843b8..93df66ea794 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -188,7 +188,14 @@ struct cpuidle_governor { extern int cpuidle_register_governor(struct cpuidle_governor *gov); extern void cpuidle_unregister_governor(struct cpuidle_governor *gov); +#ifdef CONFIG_INTEL_IDLE +extern int intel_idle_cpu_init(int cpu); #else +static inline int intel_idle_cpu_init(int cpu) { return -1; } +#endif + +#else +static inline int intel_idle_cpu_init(int cpu) { return -1; } static inline int cpuidle_register_governor(struct cpuidle_governor *gov) {return 0;} From c130bd6f82e5dda28b1a19741c4c2fe269713199 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 17 Jan 2012 12:10:16 -0800 Subject: [PATCH 15/15] acpi/apei/einj: Add extensions to EINJ from rev 5.0 of acpi spec ACPI 5.0 provides extensions to the EINJ mechanism to specify the target for the error injection - by APICID for cpu related errors, by address for memory related errors, and by segment/bus/device/function for PCIe related errors. Also extensions for vendor specific error injections. Tested-by: Chen Gong Signed-off-by: Tony Luck Signed-off-by: Len Brown --- Documentation/acpi/apei/einj.txt | 55 ++++++-- drivers/acpi/apei/einj.c | 224 ++++++++++++++++++++++++++----- include/acpi/actbl1.h | 3 +- 3 files changed, 234 insertions(+), 48 deletions(-) diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt index 5cc699ba545..e7cc3639721 100644 --- a/Documentation/acpi/apei/einj.txt +++ b/Documentation/acpi/apei/einj.txt @@ -47,20 +47,53 @@ directory apei/einj. The following files are provided. - param1 This file is used to set the first error parameter value. Effect of - parameter depends on error_type specified. For memory error, this is - physical memory address. Only available if param_extension module - parameter is specified. + parameter depends on error_type specified. - param2 This file is used to set the second error parameter value. Effect of - parameter depends on error_type specified. For memory error, this is - physical memory address mask. Only available if param_extension - module parameter is specified. + parameter depends on error_type specified. + +BIOS versions based in the ACPI 4.0 specification have limited options +to control where the errors are injected. Your BIOS may support an +extension (enabled with the param_extension=1 module parameter, or +boot command line einj.param_extension=1). This allows the address +and mask for memory injections to be specified by the param1 and +param2 files in apei/einj. + +BIOS versions using the ACPI 5.0 specification have more control over +the target of the injection. For processor related errors (type 0x1, +0x2 and 0x4) the APICID of the target should be provided using the +param1 file in apei/einj. For memory errors (type 0x8, 0x10 and 0x20) +the address is set using param1 with a mask in param2 (0x0 is equivalent +to all ones). For PCI express errors (type 0x40, 0x80 and 0x100) the +segment, bus, device and function are specified using param1: + + 31 24 23 16 15 11 10 8 7 0 + +-------------------------------------------------+ + | segment | bus | device | function | reserved | + +-------------------------------------------------+ + +An ACPI 5.0 BIOS may also allow vendor specific errors to be injected. +In this case a file named vendor will contain identifying information +from the BIOS that hopefully will allow an application wishing to use +the vendor specific extension to tell that they are running on a BIOS +that supports it. All vendor extensions have the 0x80000000 bit set in +error_type. A file vendor_flags controls the interpretation of param1 +and param2 (1 = PROCESSOR, 2 = MEMORY, 4 = PCI). See your BIOS vendor +documentation for details (and expect changes to this API if vendors +creativity in using this feature expands beyond our expectations). + +Example: +# cd /sys/kernel/debug/apei/einj +# cat available_error_type # See which errors can be injected +0x00000002 Processor Uncorrectable non-fatal +0x00000008 Memory Correctable +0x00000010 Memory Uncorrectable non-fatal +# echo 0x12345000 > param1 # Set memory address for injection +# echo 0xfffffffffffff000 > param2 # Mask - anywhere in this page +# echo 0x8 > error_type # Choose correctable memory error +# echo 1 > error_inject # Inject now -Injecting parameter support is a BIOS version specific extension, that -is, it only works on some BIOS version. If you want to use it, please -make sure your BIOS version has the proper support and specify -"param_extension=y" in module parameter. For more information about EINJ, please refer to ACPI specification -version 4.0, section 17.5. +version 4.0, section 17.5 and ACPI 5.0, section 18.6. diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 589b96c3870..31546fd2102 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -42,6 +42,42 @@ /* Firmware should respond within 1 milliseconds */ #define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC) +/* + * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. + */ +static int acpi5; + +struct set_error_type_with_address { + u32 type; + u32 vendor_extension; + u32 flags; + u32 apicid; + u64 memory_address; + u64 memory_address_range; + u32 pcie_sbdf; +}; +enum { + SETWA_FLAGS_APICID = 1, + SETWA_FLAGS_MEM = 2, + SETWA_FLAGS_PCIE_SBDF = 4, +}; + +/* + * Vendor extensions for platform specific operations + */ +struct vendor_error_type_extension { + u32 length; + u32 pcie_sbdf; + u16 vendor_id; + u16 device_id; + u8 rev_id; + u8 reserved[3]; +}; + +static u32 vendor_flags; +static struct debugfs_blob_wrapper vendor_blob; +static char vendor_dev[64]; + /* * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the * EINJ table through an unpublished extension. Use with caution as @@ -103,7 +139,14 @@ static struct apei_exec_ins_type einj_ins_type[] = { */ static DEFINE_MUTEX(einj_mutex); -static struct einj_parameter *einj_param; +static void *einj_param; + +#ifndef readq +static inline __u64 readq(volatile void __iomem *addr) +{ + return ((__u64)readl(addr+4) << 32) + readl(addr); +} +#endif #ifndef writeq static inline void writeq(__u64 val, volatile void __iomem *addr) @@ -158,10 +201,31 @@ static int einj_timedout(u64 *t) return 0; } -static u64 einj_get_parameter_address(void) +static void check_vendor_extension(u64 paddr, + struct set_error_type_with_address *v5param) +{ + int offset = readl(&v5param->vendor_extension); + struct vendor_error_type_extension *v; + u32 sbdf; + + if (!offset) + return; + v = ioremap(paddr + offset, sizeof(*v)); + if (!v) + return; + sbdf = readl(&v->pcie_sbdf); + sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n", + sbdf >> 24, (sbdf >> 16) & 0xff, + (sbdf >> 11) & 0x1f, (sbdf >> 8) & 0x7, + readw(&v->vendor_id), readw(&v->device_id), + readb(&v->rev_id)); + iounmap(v); +} + +static void *einj_get_parameter_address(void) { int i; - u64 paddr = 0; + u64 paddrv4 = 0, paddrv5 = 0; struct acpi_whea_header *entry; entry = EINJ_TAB_ENTRY(einj_tab); @@ -170,12 +234,40 @@ static u64 einj_get_parameter_address(void) entry->instruction == ACPI_EINJ_WRITE_REGISTER && entry->register_region.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - memcpy(&paddr, &entry->register_region.address, - sizeof(paddr)); + memcpy(&paddrv4, &entry->register_region.address, + sizeof(paddrv4)); + if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS && + entry->instruction == ACPI_EINJ_WRITE_REGISTER && + entry->register_region.space_id == + ACPI_ADR_SPACE_SYSTEM_MEMORY) + memcpy(&paddrv5, &entry->register_region.address, + sizeof(paddrv5)); entry++; } + if (paddrv5) { + struct set_error_type_with_address *v5param; - return paddr; + v5param = ioremap(paddrv5, sizeof(*v5param)); + if (v5param) { + acpi5 = 1; + check_vendor_extension(paddrv5, v5param); + return v5param; + } + } + if (paddrv4) { + struct einj_parameter *v4param; + + v4param = ioremap(paddrv4, sizeof(*v4param)); + if (!v4param) + return 0; + if (readq(&v4param->reserved1) || readq(&v4param->reserved2)) { + iounmap(v4param); + return 0; + } + return v4param; + } + + return 0; } /* do sanity check to trigger table */ @@ -293,12 +385,56 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) if (rc) return rc; apei_exec_ctx_set_input(&ctx, type); - rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE); - if (rc) - return rc; - if (einj_param) { - writeq(param1, &einj_param->param1); - writeq(param2, &einj_param->param2); + if (acpi5) { + struct set_error_type_with_address *v5param = einj_param; + + writel(type, &v5param->type); + if (type & 0x80000000) { + switch (vendor_flags) { + case SETWA_FLAGS_APICID: + writel(param1, &v5param->apicid); + break; + case SETWA_FLAGS_MEM: + writeq(param1, &v5param->memory_address); + writeq(param2, &v5param->memory_address_range); + break; + case SETWA_FLAGS_PCIE_SBDF: + writel(param1, &v5param->pcie_sbdf); + break; + } + writel(vendor_flags, &v5param->flags); + } else { + switch (type) { + case ACPI_EINJ_PROCESSOR_CORRECTABLE: + case ACPI_EINJ_PROCESSOR_UNCORRECTABLE: + case ACPI_EINJ_PROCESSOR_FATAL: + writel(param1, &v5param->apicid); + writel(SETWA_FLAGS_APICID, &v5param->flags); + break; + case ACPI_EINJ_MEMORY_CORRECTABLE: + case ACPI_EINJ_MEMORY_UNCORRECTABLE: + case ACPI_EINJ_MEMORY_FATAL: + writeq(param1, &v5param->memory_address); + writeq(param2, &v5param->memory_address_range); + writel(SETWA_FLAGS_MEM, &v5param->flags); + break; + case ACPI_EINJ_PCIX_CORRECTABLE: + case ACPI_EINJ_PCIX_UNCORRECTABLE: + case ACPI_EINJ_PCIX_FATAL: + writel(param1, &v5param->pcie_sbdf); + writel(SETWA_FLAGS_PCIE_SBDF, &v5param->flags); + break; + } + } + } else { + rc = apei_exec_run(&ctx, ACPI_EINJ_SET_ERROR_TYPE); + if (rc) + return rc; + if (einj_param) { + struct einj_parameter *v4param = einj_param; + writeq(param1, &v4param->param1); + writeq(param2, &v4param->param2); + } } rc = apei_exec_run(&ctx, ACPI_EINJ_EXECUTE_OPERATION); if (rc) @@ -408,15 +544,25 @@ static int error_type_set(void *data, u64 val) { int rc; u32 available_error_type = 0; + u32 tval, vendor; + + /* + * Vendor defined types have 0x80000000 bit set, and + * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE + */ + vendor = val & 0x80000000; + tval = val & 0x7fffffff; /* Only one error type can be specified */ - if (val & (val - 1)) - return -EINVAL; - rc = einj_get_available_error_type(&available_error_type); - if (rc) - return rc; - if (!(val & available_error_type)) + if (tval & (tval - 1)) return -EINVAL; + if (!vendor) { + rc = einj_get_available_error_type(&available_error_type); + if (rc) + return rc; + if (!(val & available_error_type)) + return -EINVAL; + } error_type = val; return 0; @@ -455,7 +601,6 @@ static int einj_check_table(struct acpi_table_einj *einj_tab) static int __init einj_init(void) { int rc; - u64 param_paddr; acpi_status status; struct dentry *fentry; struct apei_exec_context ctx; @@ -509,23 +654,30 @@ static int __init einj_init(void) rc = apei_exec_pre_map_gars(&ctx); if (rc) goto err_release; - if (param_extension) { - param_paddr = einj_get_parameter_address(); - if (param_paddr) { - einj_param = ioremap(param_paddr, sizeof(*einj_param)); - rc = -ENOMEM; - if (!einj_param) - goto err_unmap; - fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, - einj_debug_dir, &error_param1); - if (!fentry) - goto err_unmap; - fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, - einj_debug_dir, &error_param2); - if (!fentry) - goto err_unmap; - } else - pr_warn(EINJ_PFX "Parameter extension is not supported.\n"); + + einj_param = einj_get_parameter_address(); + if ((param_extension || acpi5) && einj_param) { + fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param1); + if (!fentry) + goto err_unmap; + fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param2); + if (!fentry) + goto err_unmap; + } + + if (vendor_dev[0]) { + vendor_blob.data = vendor_dev; + vendor_blob.size = strlen(vendor_dev); + fentry = debugfs_create_blob("vendor", S_IRUSR, + einj_debug_dir, &vendor_blob); + if (!fentry) + goto err_unmap; + fentry = debugfs_create_x32("vendor_flags", S_IRUSR | S_IWUSR, + einj_debug_dir, &vendor_flags); + if (!fentry) + goto err_unmap; } pr_info(EINJ_PFX "Error INJection is initialized.\n"); diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 7504bc99b29..f25d7efee63 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -228,7 +228,8 @@ enum acpi_einj_actions { ACPI_EINJ_EXECUTE_OPERATION = 5, ACPI_EINJ_CHECK_BUSY_STATUS = 6, ACPI_EINJ_GET_COMMAND_STATUS = 7, - ACPI_EINJ_ACTION_RESERVED = 8, /* 8 and greater are reserved */ + ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 8, + ACPI_EINJ_ACTION_RESERVED = 9, /* 9 and greater are reserved */ ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */ };