From 0b15841b0bbbde75134a777c543dc51fe6187108 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 15 Mar 2013 10:55:31 -0400 Subject: [PATCH 001/303] intel_idle: additional Haswell CPU-id There is an additional HSW CPU-id, 0x46, which has C-states exactly like CPU-id 0x45. Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 5d6675013864..1a38dd7dfe4e 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -465,6 +465,7 @@ static const struct x86_cpu_id intel_idle_ids[] = { ICPU(0x3c, idle_cpu_hsw), ICPU(0x3f, idle_cpu_hsw), ICPU(0x45, idle_cpu_hsw), + ICPU(0x46, idle_cpu_hsw), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); From 149c2319c6316d979de90ce2c8665e9c02d5927b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 15 Mar 2013 10:58:02 -0400 Subject: [PATCH 002/303] tools/power turbostat: additional Haswell CPU-id There is an additional HSW CPU-id, 0x46, which has C-states exactly like CPU-id 0x45. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6f3214ed4444..321e066a0753 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1421,6 +1421,7 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x3C: /* HSW */ case 0x3F: /* HSW */ case 0x45: /* HSW */ + case 0x46: /* HSW */ return 1; case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ @@ -1515,6 +1516,7 @@ void rapl_probe(unsigned int family, unsigned int model) case 0x3C: /* HSW */ case 0x3F: /* HSW */ case 0x45: /* HSW */ + case 0x46: /* HSW */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; break; case 0x2D: @@ -1754,6 +1756,7 @@ int is_snb(unsigned int family, unsigned int model) case 0x3C: /* HSW */ case 0x3F: /* HSW */ case 0x45: /* HSW */ + case 0x46: /* HSW */ return 1; } return 0; @@ -2276,7 +2279,7 @@ int main(int argc, char **argv) cmdline(argc, argv); if (verbose) - fprintf(stderr, "turbostat v3.2 February 11, 2013" + fprintf(stderr, "turbostat v3.3 March 15, 2013" " - Len Brown \n"); turbostat_init(); From 383efcd00053ec40023010ce5034bd702e7ab373 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 18 Mar 2013 12:22:34 -0700 Subject: [PATCH 003/303] sched: Convert BUG_ON()s in try_to_wake_up_local() to WARN_ON_ONCE()s try_to_wake_up_local() should only be invoked to wake up another task in the same runqueue and BUG_ON()s are used to enforce the rule. Missing try_to_wake_up_local() can stall workqueue execution but such stalls are likely to be finite either by another work item being queued or the one blocked getting unblocked. There's no reason to trigger BUG while holding rq lock crashing the whole system. Convert BUG_ON()s in try_to_wake_up_local() to WARN_ON_ONCE()s. Signed-off-by: Tejun Heo Acked-by: Steven Rostedt Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20130318192234.GD3042@htj.dyndns.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b7b03cd2d4cd..306943f531a3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1488,8 +1488,10 @@ static void try_to_wake_up_local(struct task_struct *p) { struct rq *rq = task_rq(p); - BUG_ON(rq != this_rq()); - BUG_ON(p == current); + if (WARN_ON_ONCE(rq != this_rq()) || + WARN_ON_ONCE(p == current)) + return; + lockdep_assert_held(&rq->lock); if (!raw_spin_trylock(&p->pi_lock)) { From 0e48026ae7abf871e51eaa9183c81ab5bef4c267 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 19 Mar 2013 16:10:38 +0100 Subject: [PATCH 004/303] perf/x86: Fix uninitialized pt_regs in intel_pmu_drain_bts_buffer() This patch fixes an uninitialized pt_regs struct in drain BTS function. The pt_regs struct is propagated all the way to the code_get_segment() function from perf_instruction_pointer() and may get garbage. We cannot simply inherit the actual pt_regs from the interrupt because BTS must be flushed on context-switch or when the associated event is disabled. And there we do not have a pt_regs handy. Setting pt_regs to all zeroes may not be the best option but it is not clear what else to do given where the drain_bts_buffer() is called from. In V2, we move the memset() later in the code to avoid doing it when we end up returning early without doing the actual BTS processing. Also dropped the reg.val initialization because it is redundant with the memset() as suggested by PeterZ. Signed-off-by: Stephane Eranian Acked-by: Peter Zijlstra Cc: peterz@infradead.org Cc: sqazi@google.com Cc: ak@linux.intel.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/20130319151038.GA25439@quad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 826054a4f2ee..f71c9f09fd4b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -314,10 +314,11 @@ int intel_pmu_drain_bts_buffer(void) if (top <= at) return 0; + memset(®s, 0, sizeof(regs)); + ds->bts_index = ds->bts_buffer_base; perf_sample_data_init(&data, 0, event->hw.last_period); - regs.ip = 0; /* * Prepare a generic sample, i.e. fill in the invariant fields. From dd9c086d9f507d02d5ba4d7c5eef4bb9518088b8 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 18 Mar 2013 14:33:28 +0100 Subject: [PATCH 005/303] perf: Fix ring_buffer perf_output_space() boundary calculation This patch fixes a flaw in perf_output_space(). In case the size of the space needed is bigger than the actual buffer size, there may be situations where the function would return true (i.e., there is space) when it should not. head > offset due to rounding of the masking logic. The problem can be tested by activating BTS on Intel processors. A BTS record can be as big as 16 pages. The following command fails: $ perf record -m 4 -c 1 -e branches:u my_test_program You will get a buffer corruption with this. Perf report won't be able to parse the perf.data. The fix is to first check that the requested space is smaller than the buffer size. If so, then the masking logic will work fine. If not, then there is no chance the record can be saved and it will be gracefully handled by upper code layers. [ In v2, we also make the logic for the writable more explicit by renaming it to rb->overwrite because it tells whether or not the buffer can overwrite its tail (suggested by PeterZ). ] Signed-off-by: Stephane Eranian Acked-by: Peter Zijlstra Cc: peterz@infradead.org Cc: jolsa@redhat.com Cc: fweisbec@gmail.com Link: http://lkml.kernel.org/r/20130318133327.GA3056@quad Signed-off-by: Ingo Molnar --- kernel/events/internal.h | 2 +- kernel/events/ring_buffer.c | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index d56a64c99a8b..eb675c4d59df 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -16,7 +16,7 @@ struct ring_buffer { int page_order; /* allocation order */ #endif int nr_pages; /* nr of data pages */ - int writable; /* are we writable */ + int overwrite; /* can overwrite itself */ atomic_t poll; /* POLL_ for wakeups */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 23cb34ff3973..97fddb09762b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -18,12 +18,24 @@ static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, unsigned long offset, unsigned long head) { - unsigned long mask; + unsigned long sz = perf_data_size(rb); + unsigned long mask = sz - 1; - if (!rb->writable) + /* + * check if user-writable + * overwrite : over-write its own tail + * !overwrite: buffer possibly drops events. + */ + if (rb->overwrite) return true; - mask = perf_data_size(rb) - 1; + /* + * verify that payload is not bigger than buffer + * otherwise masking logic may fail to detect + * the "not enough space" condition + */ + if ((head - offset) > sz) + return false; offset = (offset - tail) & mask; head = (head - tail) & mask; @@ -212,7 +224,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) rb->watermark = max_size / 2; if (flags & RING_BUFFER_WRITABLE) - rb->writable = 1; + rb->overwrite = 0; + else + rb->overwrite = 1; atomic_set(&rb->refcount, 1); From 4cc3daaf3976bde5345718e86b67cace2f935a09 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 14 Jan 2013 20:48:55 +0000 Subject: [PATCH 006/303] ARM: tlbflush: remove ARMv3 support We no longer support any ARMv3 platforms, so remove the old tlbflushing code. Signed-off-by: Will Deacon --- arch/arm/include/asm/tlbflush.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 4db8c8820f0d..a223003b0f17 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -14,7 +14,6 @@ #include -#define TLB_V3_PAGE (1 << 0) #define TLB_V4_U_PAGE (1 << 1) #define TLB_V4_D_PAGE (1 << 2) #define TLB_V4_I_PAGE (1 << 3) @@ -22,7 +21,6 @@ #define TLB_V6_D_PAGE (1 << 5) #define TLB_V6_I_PAGE (1 << 6) -#define TLB_V3_FULL (1 << 8) #define TLB_V4_U_FULL (1 << 9) #define TLB_V4_D_FULL (1 << 10) #define TLB_V4_I_FULL (1 << 11) @@ -52,7 +50,6 @@ * ============= * * We have the following to choose from: - * v3 - ARMv3 * v4 - ARMv4 without write buffer * v4wb - ARMv4 with write buffer without I TLB flush entry instruction * v4wbi - ARMv4 with write buffer with I TLB flush entry instruction @@ -330,7 +327,6 @@ static inline void local_flush_tlb_all(void) if (tlb_flag(TLB_WB)) dsb(); - tlb_op(TLB_V3_FULL, "c6, c0, 0", zero); tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero); tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero); tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero); @@ -351,9 +347,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) if (tlb_flag(TLB_WB)) dsb(); - if (possible_tlb_flags & (TLB_V3_FULL|TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { + if (possible_tlb_flags & (TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) { - tlb_op(TLB_V3_FULL, "c6, c0, 0", zero); tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero); tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero); tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero); @@ -385,9 +380,8 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) if (tlb_flag(TLB_WB)) dsb(); - if (possible_tlb_flags & (TLB_V3_PAGE|TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && + if (possible_tlb_flags & (TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { - tlb_op(TLB_V3_PAGE, "c6, c0, 0", uaddr); tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr); tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", uaddr); tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", uaddr); @@ -418,7 +412,6 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) if (tlb_flag(TLB_WB)) dsb(); - tlb_op(TLB_V3_PAGE, "c6, c0, 0", kaddr); tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr); tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr); tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr); From 82d9b0d0c6c4cbd5d0022d7df5e6bf071a9eb6c7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Jan 2013 12:07:40 +0000 Subject: [PATCH 007/303] ARM: cache: remove ARMv3 support code This is only used by 740t, which is a v4 core and (by my reading of the datasheet for the CPU) ignores CRm for the cp15 cache flush operation, making the v4 cache implementation in cache-v4.S sufficient for this CPU. Tested with 740T core-tile on Integrator/AP baseboard. Acked-by: Hyok S. Choi Acked-by: Greg Ungerer Signed-off-by: Will Deacon --- arch/arm/include/asm/glue-cache.h | 8 -- arch/arm/mm/Kconfig | 5 +- arch/arm/mm/Makefile | 1 - arch/arm/mm/cache-v3.S | 137 ------------------------------ arch/arm/mm/proc-arm740.S | 2 +- 5 files changed, 2 insertions(+), 151 deletions(-) delete mode 100644 arch/arm/mm/cache-v3.S diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index cca9f15704ed..ea289e1435e7 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -19,14 +19,6 @@ #undef _CACHE #undef MULTI_CACHE -#if defined(CONFIG_CPU_CACHE_V3) -# ifdef _CACHE -# define MULTI_CACHE 1 -# else -# define _CACHE v3 -# endif -#endif - #if defined(CONFIG_CPU_CACHE_V4) # ifdef _CACHE # define MULTI_CACHE 1 diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 025d17328730..4045c4931a30 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -43,7 +43,7 @@ config CPU_ARM740T depends on !MMU select CPU_32v4T select CPU_ABRT_LV4T - select CPU_CACHE_V3 # although the core is v4t + select CPU_CACHE_V4 select CPU_CP15_MPU select CPU_PABRT_LEGACY help @@ -469,9 +469,6 @@ config CPU_PABRT_V7 bool # The cache model -config CPU_CACHE_V3 - bool - config CPU_CACHE_V4 bool diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 4e333fa2756f..9e51be96f635 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_CPU_PABRT_LEGACY) += pabort-legacy.o obj-$(CONFIG_CPU_PABRT_V6) += pabort-v6.o obj-$(CONFIG_CPU_PABRT_V7) += pabort-v7.o -obj-$(CONFIG_CPU_CACHE_V3) += cache-v3.o obj-$(CONFIG_CPU_CACHE_V4) += cache-v4.o obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S deleted file mode 100644 index 8a3fadece8d3..000000000000 --- a/arch/arm/mm/cache-v3.S +++ /dev/null @@ -1,137 +0,0 @@ -/* - * linux/arch/arm/mm/cache-v3.S - * - * Copyright (C) 1997-2002 Russell king - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include "proc-macros.S" - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(v3_flush_icache_all) - mov pc, lr -ENDPROC(v3_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - * - * - mm - mm_struct describing address space - */ -ENTRY(v3_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(v3_flush_kern_cache_all) - /* FALLTHROUGH */ - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - flags - vma_area_struct flags describing address space - */ -ENTRY(v3_flush_user_cache_range) - mov ip, #0 - mcreq p15, 0, ip, c7, c0, 0 @ flush ID cache - mov pc, lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_coherent_user_range) - mov r0, #0 - mov pc, lr - -/* - * flush_kern_dcache_area(void *page, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(v3_flush_kern_dcache_area) - /* FALLTHROUGH */ - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_dma_flush_range) - mov r0, #0 - mcr p15, 0, r0, c7, c0, 0 @ flush ID cache - mov pc, lr - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v3_dma_unmap_area) - teq r2, #DMA_TO_DEVICE - bne v3_dma_flush_range - /* FALLTHROUGH */ - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v3_dma_map_area) - mov pc, lr -ENDPROC(v3_dma_unmap_area) -ENDPROC(v3_dma_map_area) - - .globl v3_flush_kern_cache_louis - .equ v3_flush_kern_cache_louis, v3_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions v3 diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index dc5de5d53f20..2088234978c4 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -145,5 +145,5 @@ __arm740_proc_info: .long arm740_processor_functions .long 0 .long 0 - .long v3_cache_fns @ cache model + .long v4_cache_fns @ cache model .size __arm740_proc_info, . - __arm740_proc_info From 3ef52f2a00efc5f83ae6d40e55cae96ce275893f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 22 Jan 2013 10:37:51 +0000 Subject: [PATCH 008/303] ARM: mm: fix numerous hideous errors in proc-arm740.S The setup code in proc-arm740.S is completely broken and, as far as I can tell, always has been. I was >this< close to ripping it out, when a 740t core-tile materialised in the office, so I've had a crack at fixing things up: - Fix the ram/flash area calculations so that we actually set the condition flags before testing them... - Fix the proc_info structure so that __cpu_io_mmu_flags are defined as 0, placing the __cpu_flush pointer at the correct offset - Re-number the registers used during __arm740_setup so that we don't clobber the machine ID et al - Advertise Thumb support via the hwcaps, since 740T is the only 740 implementation. Acked-by: Hyok S. Choi Signed-off-by: Will Deacon --- arch/arm/mm/proc-arm740.S | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index 2088234978c4..fde2d2a794cf 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -77,24 +77,27 @@ __arm740_setup: mcr p15, 0, r0, c6, c0 @ set area 0, default ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 + ldr r3, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, r4, lsl #1 @ the area register value orr r0, r0, #1 @ set enable bit mcr p15, 0, r0, c6, c1 @ set area 1, RAM ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 + ldr r3, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + cmp r3, #0 + moveq r0, #0 + beq 2f + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, r4, lsl #1 @ the area register value orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH +2: mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH mov r0, #0x06 mcr p15, 0, r0, c2, c0 @ Region 1&2 cacheable @@ -137,10 +140,11 @@ __arm740_proc_info: .long 0x41807400 .long 0xfffffff0 .long 0 + .long 0 b __arm740_setup .long cpu_arch_name .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT .long cpu_arm740_name .long arm740_processor_functions .long 0 From 794fe85da484353dedcb5dc6da14d923d0645fc3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 22 Jan 2013 19:11:38 +0000 Subject: [PATCH 009/303] ARM: mm: remove broken condition check for v4 flushing There's no point having a conditional cache flush if we don't know the state of the condition beforehand. This patch makes the cacheflush in v4_flush_user_cache_range unconditional. signed-off-by: will deacon --- arch/arm/mm/cache-v4.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index 43e5d77be677..a7ba68f59f0c 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -58,7 +58,7 @@ ENTRY(v4_flush_kern_cache_all) ENTRY(v4_flush_user_cache_range) #ifdef CONFIG_CPU_CP15 mov ip, #0 - mcreq p15, 0, ip, c7, c7, 0 @ flush ID cache + mcr p15, 0, ip, c7, c7, 0 @ flush ID cache mov pc, lr #else /* FALLTHROUGH */ From d455bac223d6f9858b29a29272756243ec3d3904 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 22 Jan 2013 11:00:54 +0000 Subject: [PATCH 010/303] ARM: modules: don't export cpu_set_pte_ext when !MMU cpu_set_pte_ext is only guaranteed to be defined when CONFIG_MMU, so don't export it to modules otherwise. Signed-off-by: Will Deacon --- arch/arm/mm/proc-syms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mm/proc-syms.c b/arch/arm/mm/proc-syms.c index 3e6210b4d6d4..054b491ff764 100644 --- a/arch/arm/mm/proc-syms.c +++ b/arch/arm/mm/proc-syms.c @@ -17,7 +17,9 @@ #ifndef MULTI_CPU EXPORT_SYMBOL(cpu_dcache_clean_area); +#ifdef CONFIG_MMU EXPORT_SYMBOL(cpu_set_pte_ext); +#endif #else EXPORT_SYMBOL(processor); #endif From 0eaa6cca1f75e12e4f5ec62cbe887330fe3b5fe9 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Tue, 26 Mar 2013 14:41:05 +0900 Subject: [PATCH 011/303] ASoC: core: Fix to check return value of snd_soc_update_bits_locked() It can be 0 or 1 return value of snd_soc_update_bits_locked() when it is success. So just check return value is negative. Signed-off-by: Joonyoung Shim Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/soc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 507d251916af..ff4b45a5d796 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2963,7 +2963,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val = val << shift; ret = snd_soc_update_bits_locked(codec, reg, val_mask, val); - if (ret != 0) + if (ret < 0) return ret; if (snd_soc_volsw_is_stereo(mc)) { From 904c680c7bf016a8619a045850937427f8d7368c Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 26 Mar 2013 11:33:16 -0600 Subject: [PATCH 012/303] vfio-pci: Fix possible integer overflow The VFIO_DEVICE_SET_IRQS ioctl takes a start and count parameter, both of which are unsigned. We attempt to bounds check these, but fail to account for the case where start is a very large number, allowing start + count to wrap back into the valid range. Bounds check both start and start + count. Reported-by: Dan Carpenter Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 8189cb6a86af..7abc5c81af2c 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -346,6 +346,7 @@ static long vfio_pci_ioctl(void *device_data, if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { size_t size; + int max = vfio_pci_get_irq_count(vdev, hdr.index); if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL) size = sizeof(uint8_t); @@ -355,7 +356,7 @@ static long vfio_pci_ioctl(void *device_data, return -EINVAL; if (hdr.argsz - minsz < hdr.count * size || - hdr.count > vfio_pci_get_irq_count(vdev, hdr.index)) + hdr.start >= max || hdr.start + hdr.count > max) return -EINVAL; data = memdup_user((void __user *)(arg + minsz), From bad9a43a20372a3b41e15e8aa03b918c76f360f0 Mon Sep 17 00:00:00 2001 From: Abhijeet Dharmapurikar Date: Tue, 19 Mar 2013 16:05:49 -0700 Subject: [PATCH 013/303] irqchip: gic: fix irq_trigger return The genirq layer expects a 0 in case that the retrigger function is not able to resend the irq in hardware, but the code is returning -ENXIO. Fix it. [ tglx: Reworked comment and changelog ] Signed-off-by: Abhijeet Dharmapurikar Signed-off-by: Stephen Boyd Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1363734349-32635-1-git-send-email-sboyd@codeaurora.org Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index a32e0d5aa45f..fc6aebf1e4b2 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -236,7 +236,8 @@ static int gic_retrigger(struct irq_data *d) if (gic_arch_extn.irq_retrigger) return gic_arch_extn.irq_retrigger(d); - return -ENXIO; + /* the genirq layer expects 0 if we can't retrigger in hardware */ + return 0; } #ifdef CONFIG_SMP From 84cc8fd2fe65866e49d70b38b3fdf7219dd92fe0 Mon Sep 17 00:00:00 2001 From: Michael Bohan Date: Tue, 19 Mar 2013 19:19:25 -0700 Subject: [PATCH 014/303] hrtimer: Don't reinitialize a cpu_base lock on CPU_UP The current code makes the assumption that a cpu_base lock won't be held if the CPU corresponding to that cpu_base is offline, which isn't always true. If a hrtimer is not queued, then it will not be migrated by migrate_hrtimers() when a CPU is offlined. Therefore, the hrtimer's cpu_base may still point to a CPU which has subsequently gone offline if the timer wasn't enqueued at the time the CPU went down. Normally this wouldn't be a problem, but a cpu_base's lock is blindly reinitialized each time a CPU is brought up. If a CPU is brought online during the period that another thread is performing a hrtimer operation on a stale hrtimer, then the lock will be reinitialized under its feet, and a SPIN_BUG() like the following will be observed: <0>[ 28.082085] BUG: spinlock already unlocked on CPU#0, swapper/0/0 <0>[ 28.087078] lock: 0xc4780b40, value 0x0 .magic: dead4ead, .owner: /-1, .owner_cpu: -1 <4>[ 42.451150] [] (unwind_backtrace+0x0/0x120) from [] (do_raw_spin_unlock+0x44/0xdc) <4>[ 42.460430] [] (do_raw_spin_unlock+0x44/0xdc) from [] (_raw_spin_unlock+0x8/0x30) <4>[ 42.469632] [] (_raw_spin_unlock+0x8/0x30) from [] (__hrtimer_start_range_ns+0x1e4/0x4f8) <4>[ 42.479521] [] (__hrtimer_start_range_ns+0x1e4/0x4f8) from [] (hrtimer_start+0x20/0x28) <4>[ 42.489247] [] (hrtimer_start+0x20/0x28) from [] (rcu_idle_enter_common+0x1ac/0x320) <4>[ 42.498709] [] (rcu_idle_enter_common+0x1ac/0x320) from [] (rcu_idle_enter+0xa0/0xb8) <4>[ 42.508259] [] (rcu_idle_enter+0xa0/0xb8) from [] (cpu_idle+0x24/0xf0) <4>[ 42.516503] [] (cpu_idle+0x24/0xf0) from [] (rest_init+0x88/0xa0) <4>[ 42.524319] [] (rest_init+0x88/0xa0) from [] (start_kernel+0x3d0/0x434) As an example, this particular crash occurred when hrtimer_start() was executed on CPU #0. The code locked the hrtimer's current cpu_base corresponding to CPU #1. CPU #0 then tried to switch the hrtimer's cpu_base to an optimal CPU which was online. In this case, it selected the cpu_base corresponding to CPU #3. Before it could proceed, CPU #1 came online and reinitialized the spinlock corresponding to its cpu_base. Thus now CPU #0 held a lock which was reinitialized. When CPU #0 finally ended up unlocking the old cpu_base corresponding to CPU #1 so that it could switch to CPU #3, we hit this SPIN_BUG() above while in switch_hrtimer_base(). CPU #0 CPU #1 ---- ---- ... hrtimer_start() lock_hrtimer_base(base #1) ... init_hrtimers_cpu() switch_hrtimer_base() ... ... raw_spin_lock_init(&cpu_base->lock) raw_spin_unlock(&cpu_base->lock) ... Solve this by statically initializing the lock. Signed-off-by: Michael Bohan Link: http://lkml.kernel.org/r/1363745965-23475-1-git-send-email-mbohan@codeaurora.org Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cc47812d3feb..14be27feda49 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -63,6 +63,7 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { + .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), .clock_base = { { @@ -1642,8 +1643,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu) struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; - raw_spin_lock_init(&cpu_base->lock); - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { cpu_base->clock_base[i].cpu_base = cpu_base; timerqueue_init_head(&cpu_base->clock_base[i].active); From fffe01f7a768d07cc50ace71abe28fbf2f786a43 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 26 Mar 2013 17:25:54 -0400 Subject: [PATCH 015/303] PCI: Add PCI ROM helper for platform-provided ROM images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that some UEFI systems provide apparently an apparently valid PCI ROM BAR that turns out to contain garbage, so the attempt in 547b52463 to prefer the ROM from the BAR actually breaks a different set of machines. As Linus pointed out, the graphics drivers are probably in the best position to make this judgement, so this basically reverts 547b52463 and f9a37be0f and adds a new helper function. Followup patches will add support to nouveau and radeon for probing this ROM source if they can't find a ROM from some other source. [bhelgaas: added reporter and bugzilla pointers, s/f4eb5ff05/547b52463] Reference: https://bugzilla.redhat.com/show_bug.cgi?id=927451 Reference: http://lkml.kernel.org/r/kg69ef$vdb$1@ger.gmane.org Reported-by: Mantas Mikulėnas Reported-by: Chris Murphy Signed-off-by: Matthew Garrett Signed-off-by: Bjorn Helgaas --- drivers/pci/rom.c | 67 +++++++++++++++++++++------------------------ include/linux/pci.h | 1 + 2 files changed, 32 insertions(+), 36 deletions(-) diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index b41ac7756a4b..c5d0a08a8747 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -100,27 +100,6 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size) return min((size_t)(image - rom), size); } -static loff_t pci_find_rom(struct pci_dev *pdev, size_t *size) -{ - struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; - loff_t start; - - /* assign the ROM an address if it doesn't have one */ - if (res->parent == NULL && pci_assign_resource(pdev, PCI_ROM_RESOURCE)) - return 0; - start = pci_resource_start(pdev, PCI_ROM_RESOURCE); - *size = pci_resource_len(pdev, PCI_ROM_RESOURCE); - - if (*size == 0) - return 0; - - /* Enable ROM space decodes */ - if (pci_enable_rom(pdev)) - return 0; - - return start; -} - /** * pci_map_rom - map a PCI ROM to kernel space * @pdev: pointer to pci device struct @@ -135,7 +114,7 @@ static loff_t pci_find_rom(struct pci_dev *pdev, size_t *size) void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size) { struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; - loff_t start = 0; + loff_t start; void __iomem *rom; /* @@ -154,21 +133,21 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size) return (void __iomem *)(unsigned long) pci_resource_start(pdev, PCI_ROM_RESOURCE); } else { - start = pci_find_rom(pdev, size); + /* assign the ROM an address if it doesn't have one */ + if (res->parent == NULL && + pci_assign_resource(pdev,PCI_ROM_RESOURCE)) + return NULL; + start = pci_resource_start(pdev, PCI_ROM_RESOURCE); + *size = pci_resource_len(pdev, PCI_ROM_RESOURCE); + if (*size == 0) + return NULL; + + /* Enable ROM space decodes */ + if (pci_enable_rom(pdev)) + return NULL; } } - /* - * Some devices may provide ROMs via a source other than the BAR - */ - if (!start && pdev->rom && pdev->romlen) { - *size = pdev->romlen; - return phys_to_virt(pdev->rom); - } - - if (!start) - return NULL; - rom = ioremap(start, *size); if (!rom) { /* restore enable if ioremap fails */ @@ -202,8 +181,7 @@ void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom) if (res->flags & (IORESOURCE_ROM_COPY | IORESOURCE_ROM_BIOS_COPY)) return; - if (!pdev->rom || !pdev->romlen) - iounmap(rom); + iounmap(rom); /* Disable again before continuing, leave enabled if pci=rom */ if (!(res->flags & (IORESOURCE_ROM_ENABLE | IORESOURCE_ROM_SHADOW))) @@ -227,7 +205,24 @@ void pci_cleanup_rom(struct pci_dev *pdev) } } +/** + * pci_platform_rom - provides a pointer to any ROM image provided by the + * platform + * @pdev: pointer to pci device struct + * @size: pointer to receive size of pci window over ROM + */ +void __iomem *pci_platform_rom(struct pci_dev *pdev, size_t *size) +{ + if (pdev->rom && pdev->romlen) { + *size = pdev->romlen; + return phys_to_virt((phys_addr_t)pdev->rom); + } + + return NULL; +} + EXPORT_SYMBOL(pci_map_rom); EXPORT_SYMBOL(pci_unmap_rom); EXPORT_SYMBOL_GPL(pci_enable_rom); EXPORT_SYMBOL_GPL(pci_disable_rom); +EXPORT_SYMBOL(pci_platform_rom); diff --git a/include/linux/pci.h b/include/linux/pci.h index 2461033a7987..710067f3618c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -916,6 +916,7 @@ void pci_disable_rom(struct pci_dev *pdev); void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size); +void __iomem __must_check *pci_platform_rom(struct pci_dev *pdev, size_t *size); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); From a9341512c372fcc628dabc619898d910a06c54bc Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 26 Mar 2013 15:48:38 -0700 Subject: [PATCH 016/303] openvswitch: Preallocate reply skb in ovs_vport_cmd_set(). Allocation of the Netlink notification skb can potentially fail after changing vport configuration. In general, we try to avoid this by undoing any change we made but that is difficult for existing objects. This avoids the problem by preallocating the buffer (which is fixed size). Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a4b724708a1a..6980c3e6f066 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1593,10 +1593,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, return ERR_PTR(-ENOMEM); retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); - if (retval < 0) { - kfree_skb(skb); - return ERR_PTR(retval); - } + BUG_ON(retval < 0); + return skb; } @@ -1726,24 +1724,32 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) err = -EINVAL; + reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!reply) { + err = -ENOMEM; + goto exit_unlock; + } + if (!err && a[OVS_VPORT_ATTR_OPTIONS]) err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) - goto exit_unlock; + goto exit_free; + if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, - ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + rtnl_unlock(); + return 0; + +exit_free: + kfree_skb(reply); exit_unlock: rtnl_unlock(); return err; From d3e1101c9b75574e68380b5cb10c9395fd8855de Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Wed, 27 Mar 2013 20:41:17 +0800 Subject: [PATCH 017/303] openvswitch: correct an invalid BUG_ON table->count is uint32_t Signed-off-by: Hong Zhiguo Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index fe0e4215c73d..67a2b783fe70 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -795,9 +795,9 @@ void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { + BUG_ON(table->count == 0); hlist_del_rcu(&flow->hash_node[table->node_ver]); table->count--; - BUG_ON(table->count < 0); } /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ From fa40ef208c955bfe21f53913f51f297ac3237e95 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 27 Mar 2013 16:39:01 +0000 Subject: [PATCH 018/303] ASoC: compress: Cancel delayed power down if needed When a new stream is being opened it is necessary to cancel any delayed power down of the audio. [Fixed unused variable -- broonie] Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/soc-compress.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index b5b3db71e253..ed0bfb0ddb96 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -211,19 +211,27 @@ static int soc_compr_set_params(struct snd_compr_stream *cstream, if (platform->driver->compr_ops && platform->driver->compr_ops->set_params) { ret = platform->driver->compr_ops->set_params(cstream, params); if (ret < 0) - goto out; + goto err; } if (rtd->dai_link->compr_ops && rtd->dai_link->compr_ops->set_params) { ret = rtd->dai_link->compr_ops->set_params(cstream); if (ret < 0) - goto out; + goto err; } snd_soc_dapm_stream_event(rtd, SNDRV_PCM_STREAM_PLAYBACK, SND_SOC_DAPM_STREAM_START); -out: + /* cancel any delayed stream shutdown that is pending */ + rtd->pop_wait = 0; + mutex_unlock(&rtd->pcm_mutex); + + cancel_delayed_work_sync(&rtd->delayed_work); + + return ret; + +err: mutex_unlock(&rtd->pcm_mutex); return ret; } From 71196a26d85be2aae2e587dfd3c3190a651133c6 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 27 Mar 2013 02:20:45 +0000 Subject: [PATCH 019/303] sparc:remove unused declaration smp_boot_cpus() smp_boot_cpus() was replaced smp_prepare_cpus() long ago, and it no longer needed, so delete it. Signed-off-by: Kefeng Wang Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/smp_32.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index b73da3c5f10a..68503d3d1b9e 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -46,7 +46,6 @@ void sun4m_init_smp(void); void sun4d_init_smp(void); void smp_callin(void); -void smp_boot_cpus(void); void smp_store_cpu_info(int); void smp_resched_interrupt(void); From 54df2db36c93bbb8c757f172d97c577040de6abb Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 29 Mar 2013 03:44:43 +0000 Subject: [PATCH 020/303] sparc/srmmu: clear trailing edge of bitmap properly srmmu_nocache_bitmap is cleared by bit_map_init(). But bit_map_init() attempts to clear by memset(), so it can't clear the trailing edge of bitmap properly on big-endian architecture if the number of bits is not a multiple of BITS_PER_LONG. Actually, the number of bits in srmmu_nocache_bitmap is not always a multiple of BITS_PER_LONG. It is calculated as below: bitmap_bits = srmmu_nocache_size >> SRMMU_NOCACHE_BITMAP_SHIFT; srmmu_nocache_size is decided proportionally by the amount of system RAM and it is rounded to a multiple of PAGE_SIZE. SRMMU_NOCACHE_BITMAP_SHIFT is defined as (PAGE_SHIFT - 4). So it can only be said that bitmap_bits is a multiple of 16. This fixes the problem by using bitmap_clear() instead of memset() in bit_map_init() and this also uses BITS_TO_LONGS() to calculate correct size at bitmap allocation time. Signed-off-by: Akinobu Mita Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- arch/sparc/lib/bitext.c | 6 +----- arch/sparc/mm/srmmu.c | 4 +++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c index 48d00e72ce15..8ec4e9c0251a 100644 --- a/arch/sparc/lib/bitext.c +++ b/arch/sparc/lib/bitext.c @@ -119,11 +119,7 @@ void bit_map_clear(struct bit_map *t, int offset, int len) void bit_map_init(struct bit_map *t, unsigned long *map, int size) { - - if ((size & 07) != 0) - BUG(); - memset(map, 0, size>>3); - + bitmap_zero(map, size); memset(t, 0, sizeof *t); spin_lock_init(&t->lock); t->map = map; diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index c38bb72e3e80..036c2797dece 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -280,7 +280,9 @@ static void __init srmmu_nocache_init(void) SRMMU_NOCACHE_ALIGN_MAX, 0UL); memset(srmmu_nocache_pool, 0, srmmu_nocache_size); - srmmu_nocache_bitmap = __alloc_bootmem(bitmap_bits >> 3, SMP_CACHE_BYTES, 0UL); + srmmu_nocache_bitmap = + __alloc_bootmem(BITS_TO_LONGS(bitmap_bits) * sizeof(long), + SMP_CACHE_BYTES, 0UL); bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits); srmmu_swapper_pg_dir = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); From 9a0ac1b6af111f2fa0f2a39e9bbbebb74da7d26a Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 29 Mar 2013 03:44:44 +0000 Subject: [PATCH 021/303] sparc/iommu: fix typo s/265KB/256KB/ IOMMU_NPTES is 64K PTEs, so the size is 256KB (= 64K * sizeof(iopte_t)) Signed-off-by: Akinobu Mita Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 0f4f7191fbba..28f96f27c768 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -34,7 +34,7 @@ #define IOMMU_RNGE IOMMU_RNGE_256MB #define IOMMU_START 0xF0000000 #define IOMMU_WINSIZE (256*1024*1024U) -#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 265KB */ +#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 256KB */ #define IOMMU_ORDER 6 /* 4096 * (1<<6) */ /* srmmu.c */ From bf3aece8af91eabea2cfb2cbe528abc394695163 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 30 Mar 2013 00:28:26 +0000 Subject: [PATCH 022/303] sparc:cleanup unused code in smp_32.h After genirq and generic clockevent support at sparc32, smp4m_irq_rotate(), prof_multiplier() and prof_counter() are no longer used and should be removed. Find more info from commit 6baa9b20 & 62f08283. Signed-off-by: Kefeng Wang Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/smp_32.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index 68503d3d1b9e..3c8917f054de 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -36,7 +36,6 @@ typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); void cpu_panic(void); -extern void smp4m_irq_rotate(int cpu); /* * General functions that each host system must provide. @@ -106,9 +105,6 @@ extern int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) -#define prof_multiplier(__cpu) cpu_data(__cpu).multiplier -#define prof_counter(__cpu) cpu_data(__cpu).counter - void smp_setup_cpu_possible_map(void); #endif /* !(__ASSEMBLY__) */ From a2d34dd41212032c03e77bc30c2023725def841a Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 30 Mar 2013 11:44:22 +0000 Subject: [PATCH 023/303] sparc: use generic headers Use "generic-y" to add generic headers where possible Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/Kbuild | 4 ++++ arch/sparc/include/asm/cputime.h | 6 ------ arch/sparc/include/asm/emergency-restart.h | 6 ------ arch/sparc/include/asm/mutex.h | 9 --------- arch/sparc/include/asm/serial.h | 6 ------ 5 files changed, 4 insertions(+), 27 deletions(-) delete mode 100644 arch/sparc/include/asm/cputime.h delete mode 100644 arch/sparc/include/asm/emergency-restart.h delete mode 100644 arch/sparc/include/asm/mutex.h delete mode 100644 arch/sparc/include/asm/serial.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index e26d430ce2fd..f73884bb286a 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -2,11 +2,15 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += div64.h +generic-y += emergency-restart.h generic-y += exec.h generic-y += local64.h +generic-y += mutex.h generic-y += irq_regs.h generic-y += local.h generic-y += module.h +generic-y += serial.h generic-y += trace_clock.h generic-y += word-at-a-time.h diff --git a/arch/sparc/include/asm/cputime.h b/arch/sparc/include/asm/cputime.h deleted file mode 100644 index 1a642b81e019..000000000000 --- a/arch/sparc/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC_CPUTIME_H -#define __SPARC_CPUTIME_H - -#include - -#endif /* __SPARC_CPUTIME_H */ diff --git a/arch/sparc/include/asm/emergency-restart.h b/arch/sparc/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/arch/sparc/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/sparc/include/asm/mutex.h b/arch/sparc/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/sparc/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/sparc/include/asm/serial.h b/arch/sparc/include/asm/serial.h deleted file mode 100644 index f90d61c28059..000000000000 --- a/arch/sparc/include/asm/serial.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC_SERIAL_H -#define __SPARC_SERIAL_H - -#define BASE_BAUD ( 1843200 / 16 ) - -#endif /* __SPARC_SERIAL_H */ From 6e51f857303d02770d5f3ff8f0436487168d0230 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 30 Mar 2013 11:44:24 +0000 Subject: [PATCH 024/303] bbc_i2c: fix section mismatch warning Fix following warning: WARNING: drivers/sbus/char/bbc.o(.text+0x674): Section mismatch in reference from the function bbc_i2c_probe() to the function .init.text:T.463() The function bbc_i2c_probe() references the function __init T.463(). This is often because bbc_i2c_probe lacks a __init annotation or the annotation of T.463 is wrong. bbc_i2c_probe() referenced the inlined attach_one_i2c(). As probe may be called after init drop __init annotations on all functions used by bbc_i2c_probe() The warning was seen with a sparc64 defconfig build Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- drivers/sbus/char/bbc_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c index 1a9d1e3ce64c..c1441ed282eb 100644 --- a/drivers/sbus/char/bbc_i2c.c +++ b/drivers/sbus/char/bbc_i2c.c @@ -282,7 +282,7 @@ static irqreturn_t bbc_i2c_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void __init reset_one_i2c(struct bbc_i2c_bus *bp) +static void reset_one_i2c(struct bbc_i2c_bus *bp) { writeb(I2C_PCF_PIN, bp->i2c_control_regs + 0x0); writeb(bp->own, bp->i2c_control_regs + 0x1); @@ -291,7 +291,7 @@ static void __init reset_one_i2c(struct bbc_i2c_bus *bp) writeb(I2C_PCF_IDLE, bp->i2c_control_regs + 0x0); } -static struct bbc_i2c_bus * __init attach_one_i2c(struct platform_device *op, int index) +static struct bbc_i2c_bus * attach_one_i2c(struct platform_device *op, int index) { struct bbc_i2c_bus *bp; struct device_node *dp; From cbf1ef6b3345d2cc7e62407eec6a6f72a8b1346f Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 31 Mar 2013 07:01:47 +0000 Subject: [PATCH 025/303] sparc: use asm-generic version of types.h In sparc headers we use the following pattern: #if defined(__sparc__) && defined(__arch64__) sparc64 specific stuff #else sparc32 specific stuff #endif In types.h this pattern was not followed and here we only checked for __sparc__ for no good reason. It was a left-over from long time ago. I checked other architectures - and most of them do not have any such checks. And all the recently merged versions uses the asm-generic version. Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/Kbuild | 1 + arch/sparc/include/uapi/asm/Kbuild | 1 - arch/sparc/include/uapi/asm/types.h | 17 ----------------- 3 files changed, 1 insertion(+), 18 deletions(-) delete mode 100644 arch/sparc/include/uapi/asm/types.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index f73884bb286a..ff18e3cfb6b1 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -13,4 +13,5 @@ generic-y += local.h generic-y += module.h generic-y += serial.h generic-y += trace_clock.h +generic-y += types.h generic-y += word-at-a-time.h diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild index ce175aff71b7..b5843ee09fb5 100644 --- a/arch/sparc/include/uapi/asm/Kbuild +++ b/arch/sparc/include/uapi/asm/Kbuild @@ -44,7 +44,6 @@ header-y += swab.h header-y += termbits.h header-y += termios.h header-y += traps.h -header-y += types.h header-y += uctx.h header-y += unistd.h header-y += utrap.h diff --git a/arch/sparc/include/uapi/asm/types.h b/arch/sparc/include/uapi/asm/types.h deleted file mode 100644 index 383d156cde9c..000000000000 --- a/arch/sparc/include/uapi/asm/types.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _SPARC_TYPES_H -#define _SPARC_TYPES_H -/* - * This file is never included by application software unless - * explicitly requested (e.g., via linux/types.h) in which case the - * application is Linux specific so (user-) name space pollution is - * not a major issue. However, for interoperability, libraries still - * need to be careful to avoid a name clashes. - */ - -#if defined(__sparc__) - -#include - -#endif /* defined(__sparc__) */ - -#endif /* defined(_SPARC_TYPES_H) */ From 8f08d6667287241f6818d35e02b223fb5df97cf1 Mon Sep 17 00:00:00 2001 From: Nigel Roberts Date: Mon, 1 Apr 2013 23:03:22 +1100 Subject: [PATCH 026/303] ARM: Kirkwood: Fix typo in the definition of ix2-200 rebuild LED In the conversion to pinctrl, an error in the pins for the rebuild LED was introduced. This patch assigns the correct pins and includes the correct name for the LED in kirkwood-iomega_ix2_200.dts. Signed-off-by: Nigel Roberts Cc: # v3.8.x Signed-off-by: Jason Cooper --- arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts b/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts index 93c3afbef9ee..3694e94f6e99 100644 --- a/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts +++ b/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts @@ -96,11 +96,11 @@ marvell,function = "gpio"; }; pmx_led_rebuild_brt_ctrl_1: pmx-led-rebuild-brt-ctrl-1 { - marvell,pins = "mpp44"; + marvell,pins = "mpp46"; marvell,function = "gpio"; }; pmx_led_rebuild_brt_ctrl_2: pmx-led-rebuild-brt-ctrl-2 { - marvell,pins = "mpp45"; + marvell,pins = "mpp47"; marvell,function = "gpio"; }; @@ -157,14 +157,14 @@ gpios = <&gpio0 16 0>; linux,default-trigger = "default-on"; }; - health_led1 { + rebuild_led { + label = "status:white:rebuild_led"; + gpios = <&gpio1 4 0>; + }; + health_led { label = "status:red:health_led"; gpios = <&gpio1 5 0>; }; - health_led2 { - label = "status:white:health_led"; - gpios = <&gpio1 4 0>; - }; backup_led { label = "status:blue:backup_led"; gpios = <&gpio0 15 0>; From 2cfda637e29ce9e3df31b59f64516b2e571cc985 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 1 Apr 2013 11:48:59 -0600 Subject: [PATCH 027/303] EISA/PCI: Fix bus res reference Matthew found that 3.8.3 is having problems with an old (ancient) PCI-to-EISA bridge, the Intel 82375. It worked with the 3.2 kernel. He identified the 82375, but doesn't assign the struct resource *res pointer inside the struct eisa_root_device, and panics. pci_eisa_init() was using bus->resource[] directly instead of pci_bus_resource_n(). The bus->resource[] array is a PCI-internal implementation detail, and after commit 45ca9e97 (PCI: add helpers for building PCI bus resource lists) and commit 0efd5aab (PCI: add struct pci_host_bridge_window with CPU/bus address offset), bus->resource[] is not used for PCI root buses any more. The 82375 is a subtractive-decode PCI device, so handle it the same way we handle PCI-PCI bridges in subtractive-decode mode in pci_read_bridge_bases(). [bhelgaas: changelog] Reported-by: Matthew Whitehead Tested-by: Matthew Whitehead Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v3.3+ --- drivers/eisa/pci_eisa.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c index cdae207028a7..ef5c3ec87432 100644 --- a/drivers/eisa/pci_eisa.c +++ b/drivers/eisa/pci_eisa.c @@ -22,7 +22,8 @@ static struct eisa_root_device pci_eisa_root; static int __init pci_eisa_init(struct pci_dev *pdev, const struct pci_device_id *ent) { - int rc; + int rc, i; + struct resource *res, *bus_res = NULL; if ((rc = pci_enable_device (pdev))) { printk (KERN_ERR "pci_eisa : Could not enable device %s\n", @@ -30,9 +31,30 @@ static int __init pci_eisa_init(struct pci_dev *pdev, return rc; } + /* + * The Intel 82375 PCI-EISA bridge is a subtractive-decode PCI + * device, so the resources available on EISA are the same as those + * available on the 82375 bus. This works the same as a PCI-PCI + * bridge in subtractive-decode mode (see pci_read_bridge_bases()). + * We assume other PCI-EISA bridges are similar. + * + * eisa_root_register() can only deal with a single io port resource, + * so we use the first valid io port resource. + */ + pci_bus_for_each_resource(pdev->bus, res, i) + if (res && (res->flags & IORESOURCE_IO)) { + bus_res = res; + break; + } + + if (!bus_res) { + dev_err(&pdev->dev, "No resources available\n"); + return -1; + } + pci_eisa_root.dev = &pdev->dev; - pci_eisa_root.res = pdev->bus->resource[0]; - pci_eisa_root.bus_base_addr = pdev->bus->resource[0]->start; + pci_eisa_root.res = bus_res; + pci_eisa_root.bus_base_addr = bus_res->start; pci_eisa_root.slots = EISA_MAX_SLOTS; pci_eisa_root.dma_mask = pdev->dma_mask; dev_set_drvdata(pci_eisa_root.dev, &pci_eisa_root); From c5fb301ae83bec6892e54984e6ec765c47df8e10 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 27 Mar 2013 21:28:05 -0700 Subject: [PATCH 028/303] EISA/PCI: Init EISA early, before PNP Matthew reported kernels fail the pci_eisa probe and are later successful with the virtual_eisa_root_init force probe without slot0. The reason for that is: PNP probing is before pci_eisa_init gets called as pci_eisa_init is called via pci_driver. pnp 00:0f has 0xc80 - 0xc84 reserved. [ 9.700409] pnp 00:0f: [io 0x0c80-0x0c84] so eisa_probe will fail from pci_eisa_init ==>eisa_root_register ==>eisa_probe path. as force_probe is not set in pci_eisa_root, it will bail early when slot0 is not probed and initialized. Try to use subsys_initcall_sync instead, and will keep following sequence: pci_subsys_init pci_eisa_init_early pnpacpi_init/isapnp_init After this patch EISA can be initialized properly, and PNP overlapping resource will not be reserved. [ 10.104434] system 00:0f: [io 0x0c80-0x0c84] could not be reserved Reported-by: Matthew Whitehead Tested-by: Matthew Whitehead Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org --- drivers/eisa/pci_eisa.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c index ef5c3ec87432..6c3fca97d346 100644 --- a/drivers/eisa/pci_eisa.c +++ b/drivers/eisa/pci_eisa.c @@ -19,8 +19,7 @@ /* There is only *one* pci_eisa device per machine, right ? */ static struct eisa_root_device pci_eisa_root; -static int __init pci_eisa_init(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int __init pci_eisa_init(struct pci_dev *pdev) { int rc, i; struct resource *res, *bus_res = NULL; @@ -67,22 +66,26 @@ static int __init pci_eisa_init(struct pci_dev *pdev, return 0; } -static struct pci_device_id pci_eisa_pci_tbl[] = { - { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 }, - { 0, } -}; - -static struct pci_driver __refdata pci_eisa_driver = { - .name = "pci_eisa", - .id_table = pci_eisa_pci_tbl, - .probe = pci_eisa_init, -}; - -static int __init pci_eisa_init_module (void) +/* + * We have to call pci_eisa_init_early() before pnpacpi_init()/isapnp_init(). + * Otherwise pnp resource will get enabled early and could prevent eisa + * to be initialized. + * Also need to make sure pci_eisa_init_early() is called after + * x86/pci_subsys_init(). + * So need to use subsys_initcall_sync with it. + */ +static int __init pci_eisa_init_early(void) { - return pci_register_driver (&pci_eisa_driver); -} + struct pci_dev *dev = NULL; + int ret; -device_initcall(pci_eisa_init_module); -MODULE_DEVICE_TABLE(pci, pci_eisa_pci_tbl); + for_each_pci_dev(dev) + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_EISA) { + ret = pci_eisa_init(dev); + if (ret) + return ret; + } + + return 0; +} +subsys_initcall_sync(pci_eisa_init_early); From 121cdf08cc854bda4892d2c755e32ef424772ac2 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 26 Mar 2013 17:25:55 -0400 Subject: [PATCH 029/303] nouveau: Attempt to use platform-provided ROM image Some platforms only provide their PCI ROM via a platform-specific interface. Fall back to attempting that if all other sources fail. Signed-off-by: Matthew Garrett Signed-off-by: Bjorn Helgaas --- drivers/gpu/drm/nouveau/core/subdev/bios/base.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c index e816f06637a7..0e2c1a4f1659 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c @@ -248,6 +248,22 @@ nouveau_bios_shadow_pci(struct nouveau_bios *bios) } } +static void +nouveau_bios_shadow_platform(struct nouveau_bios *bios) +{ + struct pci_dev *pdev = nv_device(bios)->pdev; + size_t size; + + void __iomem *rom = pci_platform_rom(pdev, &size); + if (rom && size) { + bios->data = kmalloc(size, GFP_KERNEL); + if (bios->data) { + memcpy_fromio(bios->data, rom, size); + bios->size = size; + } + } +} + static int nouveau_bios_score(struct nouveau_bios *bios, const bool writeable) { @@ -288,6 +304,7 @@ nouveau_bios_shadow(struct nouveau_bios *bios) { "PROM", nouveau_bios_shadow_prom, false, 0, 0, NULL }, { "ACPI", nouveau_bios_shadow_acpi, true, 0, 0, NULL }, { "PCIROM", nouveau_bios_shadow_pci, true, 0, 0, NULL }, + { "PLATFORM", nouveau_bios_shadow_platform, true, 0, 0, NULL }, {} }; struct methods *mthd, *best; From 06a08570085b3b20c45f45dc66dc46851ecbcb5b Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 26 Mar 2013 17:25:56 -0400 Subject: [PATCH 030/303] radeon: Attempt to use platform-provided ROM image Some platforms only provide their PCI ROM via a platform-specific interface. Fall back to attempting that if all other sources fail. Signed-off-by: Matthew Garrett Signed-off-by: Bjorn Helgaas --- drivers/gpu/drm/radeon/radeon_bios.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index b8015913d382..fa3c56fba294 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -99,6 +99,29 @@ static bool radeon_read_bios(struct radeon_device *rdev) return true; } +static bool radeon_read_platform_bios(struct radeon_device *rdev) +{ + uint8_t __iomem *bios; + size_t size; + + rdev->bios = NULL; + + bios = pci_platform_rom(rdev->pdev, &size); + if (!bios) { + return false; + } + + if (size == 0 || bios[0] != 0x55 || bios[1] != 0xaa) { + return false; + } + rdev->bios = kmemdup(bios, size, GFP_KERNEL); + if (rdev->bios == NULL) { + return false; + } + + return true; +} + #ifdef CONFIG_ACPI /* ATRM is used to get the BIOS on the discrete cards in * dual-gpu systems. @@ -620,6 +643,9 @@ bool radeon_get_bios(struct radeon_device *rdev) if (r == false) { r = radeon_read_disabled_bios(rdev); } + if (r == false) { + r = radeon_read_platform_bios(rdev); + } if (r == false || rdev->bios == NULL) { DRM_ERROR("Unable to locate a BIOS ROM\n"); rdev->bios = NULL; From d3dde52209ab571e4e2ec26c66f85ad1355f7475 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 28 Mar 2013 21:54:03 +0200 Subject: [PATCH 031/303] crypto: gcm - fix assumption that assoc has one segment rfc4543(gcm(*)) code for GMAC assumes that assoc scatterlist always contains only one segment and only makes use of this first segment. However ipsec passes assoc with three segments when using 'extended sequence number' thus in this case rfc4543(gcm(*)) fails to function correctly. Patch fixes this issue. Reported-by: Chaoxing Lin Tested-by: Chaoxing Lin Cc: stable@vger.kernel.org Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- crypto/gcm.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/crypto/gcm.c b/crypto/gcm.c index 137ad1ec5438..13ccbda34ff9 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -44,6 +44,7 @@ struct crypto_rfc4543_ctx { struct crypto_rfc4543_req_ctx { u8 auth_tag[16]; + u8 assocbuf[32]; struct scatterlist cipher[1]; struct scatterlist payload[2]; struct scatterlist assoc[2]; @@ -1133,9 +1134,19 @@ static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, scatterwalk_crypto_chain(payload, dst, vdst == req->iv + 8, 2); assoclen += 8 + req->cryptlen - (enc ? 0 : authsize); - sg_init_table(assoc, 2); - sg_set_page(assoc, sg_page(req->assoc), req->assoc->length, - req->assoc->offset); + if (req->assoc->length == req->assoclen) { + sg_init_table(assoc, 2); + sg_set_page(assoc, sg_page(req->assoc), req->assoc->length, + req->assoc->offset); + } else { + BUG_ON(req->assoclen > sizeof(rctx->assocbuf)); + + scatterwalk_map_and_copy(rctx->assocbuf, req->assoc, 0, + req->assoclen, 0); + + sg_init_table(assoc, 2); + sg_set_buf(assoc, rctx->assocbuf, req->assoclen); + } scatterwalk_crypto_chain(assoc, payload, 0, 2); aead_request_set_tfm(subreq, ctx->child); From 0b94c57717054811dd836d582585d3fb4856b43f Mon Sep 17 00:00:00 2001 From: Padmavathi Venna Date: Tue, 5 Mar 2013 14:55:31 +0530 Subject: [PATCH 032/303] DMA: PL330: Add check if device tree compatible This patch register the dma controller with generic dma helpers only in DT case. This also adds some extra error handling in the driver. Signed-off-by: Padmavathi Venna Reported-by: Sachin Kamat Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 718153122759..5dbc5946c4c3 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2882,7 +2882,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) { struct dma_pl330_platdata *pdat; struct dma_pl330_dmac *pdmac; - struct dma_pl330_chan *pch; + struct dma_pl330_chan *pch, *_p; struct pl330_info *pi; struct dma_device *pd; struct resource *res; @@ -2984,7 +2984,16 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) ret = dma_async_device_register(pd); if (ret) { dev_err(&adev->dev, "unable to register DMAC\n"); - goto probe_err2; + goto probe_err3; + } + + if (adev->dev.of_node) { + ret = of_dma_controller_register(adev->dev.of_node, + of_dma_pl330_xlate, pdmac); + if (ret) { + dev_err(&adev->dev, + "unable to register DMA to the generic DT DMA helpers\n"); + } } dev_info(&adev->dev, @@ -2995,16 +3004,21 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pi->pcfg.data_bus_width / 8, pi->pcfg.num_chan, pi->pcfg.num_peri, pi->pcfg.num_events); - ret = of_dma_controller_register(adev->dev.of_node, - of_dma_pl330_xlate, pdmac); - if (ret) { - dev_err(&adev->dev, - "unable to register DMA to the generic DT DMA helpers\n"); - goto probe_err2; - } - return 0; +probe_err3: + amba_set_drvdata(adev, NULL); + /* Idle the DMAC */ + list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels, + chan.device_node) { + + /* Remove the channel */ + list_del(&pch->chan.device_node); + + /* Flush the channel */ + pl330_control(&pch->chan, DMA_TERMINATE_ALL, 0); + pl330_free_chan_resources(&pch->chan); + } probe_err2: pl330_del(pi); probe_err1: @@ -3023,8 +3037,10 @@ static int pl330_remove(struct amba_device *adev) if (!pdmac) return 0; - of_dma_controller_free(adev->dev.of_node); + if (adev->dev.of_node) + of_dma_controller_free(adev->dev.of_node); + dma_async_device_unregister(&pdmac->ddma); amba_set_drvdata(adev, NULL); /* Idle the DMAC */ From bee980d9e9642e96351fa3ca9077b853ecf62f57 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Thu, 28 Mar 2013 10:03:36 -0400 Subject: [PATCH 033/303] xen/events: Handle VIRQ_TIMER before any other hardirq in event loop. This avoids any other hardirq handler seeing a very stale jiffies value immediately after wakeup from a long idle period. The one observable symptom of this was a USB keyboard, with software keyboard repeat, which would always repeat a key immediately that it was pressed. This is due to the key press waking the guest, the key handler immediately runs, sees an old jiffies value, and then that jiffies value significantly updated, before the key is unpressed. Reviewed-by: David Vrabel Signed-off-by: Keir Fraser Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index aa85881d17b2..2647ad8e1f19 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -1316,7 +1316,7 @@ static void __xen_evtchn_do_upcall(void) { int start_word_idx, start_bit_idx; int word_idx, bit_idx; - int i; + int i, irq; int cpu = get_cpu(); struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); @@ -1324,6 +1324,8 @@ static void __xen_evtchn_do_upcall(void) do { xen_ulong_t pending_words; + xen_ulong_t pending_bits; + struct irq_desc *desc; vcpu_info->evtchn_upcall_pending = 0; @@ -1335,6 +1337,17 @@ static void __xen_evtchn_do_upcall(void) * selector flag. xchg_xen_ulong must contain an * appropriate barrier. */ + if ((irq = per_cpu(virq_to_irq, cpu)[VIRQ_TIMER]) != -1) { + int evtchn = evtchn_from_irq(irq); + word_idx = evtchn / BITS_PER_LONG; + pending_bits = evtchn % BITS_PER_LONG; + if (active_evtchns(cpu, s, word_idx) & (1ULL << pending_bits)) { + desc = irq_to_desc(irq); + if (desc) + generic_handle_irq_desc(irq, desc); + } + } + pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0); start_word_idx = __this_cpu_read(current_word_idx); @@ -1343,7 +1356,6 @@ static void __xen_evtchn_do_upcall(void) word_idx = start_word_idx; for (i = 0; pending_words != 0; i++) { - xen_ulong_t pending_bits; xen_ulong_t words; words = MASK_LSBS(pending_words, word_idx); @@ -1372,8 +1384,7 @@ static void __xen_evtchn_do_upcall(void) do { xen_ulong_t bits; - int port, irq; - struct irq_desc *desc; + int port; bits = MASK_LSBS(pending_bits, bit_idx); From b22227944b8fe92b19150b4c36421e37979d9a16 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 29 Mar 2013 10:20:56 -0400 Subject: [PATCH 034/303] xen/mmu: On early bootup, flush the TLB when changing RO->RW bits Xen provided pagetables. Occassionaly on a DL380 G4 the guest would crash quite early with this: (XEN) d244:v0: unhandled page fault (ec=0003) (XEN) Pagetable walk from ffffffff84dc7000: (XEN) L4[0x1ff] = 00000000c3f18067 0000000000001789 (XEN) L3[0x1fe] = 00000000c3f14067 000000000000178d (XEN) L2[0x026] = 00000000dc8b2067 0000000000004def (XEN) L1[0x1c7] = 00100000dc8da067 0000000000004dc7 (XEN) domain_crash_sync called from entry.S (XEN) Domain 244 (vcpu#0) crashed on cpu#3: (XEN) ----[ Xen-4.1.3OVM x86_64 debug=n Not tainted ]---- (XEN) CPU: 3 (XEN) RIP: e033:[] (XEN) RFLAGS: 0000000000000216 EM: 1 CONTEXT: pv guest (XEN) rax: 0000000000000000 rbx: ffffffff81785f88 rcx: 000000000000003f (XEN) rdx: 0000000000000000 rsi: 00000000dc8da063 rdi: ffffffff84dc7000 The offending code shows it to be a loop writting the value zero (%rax) in the %rdi (the L4 provided by Xen) register: 0: 44 00 00 add %r8b,(%rax) 3: 31 c0 xor %eax,%eax 5: b9 40 00 00 00 mov $0x40,%ecx a: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 11: 00 00 13: ff c9 dec %ecx 15:* 48 89 07 mov %rax,(%rdi) <-- trapping instruction 18: 48 89 47 08 mov %rax,0x8(%rdi) 1c: 48 89 47 10 mov %rax,0x10(%rdi) which fails. xen_setup_kernel_pagetable recycles some of the Xen's page-table entries when it has switched over to its Linux page-tables. Right before try to clear the page, we make a hypercall to change it from _RO to _RW and that works (otherwise we would hit an BUG()). And the _RW flag is set for that page: (XEN) L1[0x1c7] = 001000004885f067 0000000000004dc7 The error code is 3, so PFEC_page_present and PFEC_write_access, so page is present (correct), and we tried to write to the page, but a violation occurred. The one theory is that the the page entries in hardware (which are cached) are not up to date with what we just set. Especially as we have just done an CR3 write and flushed the multicalls. This patch does solve the problem by flusing out the TLB page entry after changing it from _RO to _RW and we don't hit this issue anymore. Fixed-Oracle-Bug: 16243091 [ON OCCASIONS VM START GOES INTO 'CRASH' STATE: CLEAR_PAGE+0X12 ON HP DL380 G4] Reported-and-Tested-by: Saar Maoz Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6afbb2ca9a0a..a4ea92477e01 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1748,14 +1748,18 @@ static void *m2v(phys_addr_t maddr) } /* Set the page permissions on an identity-mapped pages */ -static void set_page_prot(void *addr, pgprot_t prot) +static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) { unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) BUG(); } +static void set_page_prot(void *addr, pgprot_t prot) +{ + return set_page_prot_flags(addr, prot, UVMF_NONE); +} #ifdef CONFIG_X86_32 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { @@ -1839,12 +1843,12 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, unsigned long addr) { if (*pt_base == PFN_DOWN(__pa(addr))) { - set_page_prot((void *)addr, PAGE_KERNEL); + set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); clear_page((void *)addr); (*pt_base)++; } if (*pt_end == PFN_DOWN(__pa(addr))) { - set_page_prot((void *)addr, PAGE_KERNEL); + set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); clear_page((void *)addr); (*pt_end)--; } From b62d7946b2dd04e07734313a450cb0a8b440f206 Mon Sep 17 00:00:00 2001 From: Roland Stigge Date: Tue, 2 Apr 2013 19:37:11 +0200 Subject: [PATCH 035/303] MAINTAINERS: Add maintainer for LPC32xx This patch adds Roland Stigge as maintainer for NXP LPC32xx. Signed-off-by: Roland Stigge Signed-off-by: Olof Johansson --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 4cf5fd334a06..d596a3f6345a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4935,6 +4935,12 @@ W: logfs.org S: Maintained F: fs/logfs/ +LPC32XX MACHINE SUPPORT +M: Roland Stigge +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/mach-lpc32xx/ + LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) M: Nagalakshmi Nandigama M: Sreekanth Reddy From 8b4b9f27e57584f3d90e0bb84cf800ad81cfe3a1 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 15 Feb 2013 12:21:43 -0500 Subject: [PATCH 036/303] x86: remove the x32 syscall bitmask from syscall_get_nr() Commit fca460f95e928bae373daa8295877b6905bc62b8 simplified the x32 implementation by creating a syscall bitmask, equal to 0x40000000, that could be applied to x32 syscalls such that the masked syscall number would be the same as a x86_64 syscall. While that patch was a nice way to simplify the code, it went a bit too far by adding the mask to syscall_get_nr(); returning the masked syscall numbers can cause confusion with callers that expect syscall numbers matching the x32 ABI, e.g. unmasked syscall numbers. This patch fixes this by simply removing the mask from syscall_get_nr() while preserving the other changes from the original commit. While there are several syscall_get_nr() callers in the kernel, most simply check that the syscall number is greater than zero, in this case this patch will have no effect. Of those remaining callers, they appear to be few, seccomp and ftrace, and from my testing of seccomp without this patch the original commit definitely breaks things; the seccomp filter does not correctly filter the syscalls due to the difference in syscall numbers in the BPF filter and the value from syscall_get_nr(). Applying this patch restores the seccomp BPF filter functionality on x32. I've tested this patch with the seccomp BPF filters as well as ftrace and everything looks reasonable to me; needless to say general usage seemed fine as well. Signed-off-by: Paul Moore Link: http://lkml.kernel.org/r/20130215172143.12549.10292.stgit@localhost Cc: Cc: Will Drewry Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscall.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 1ace47b62592..2e188d68397c 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -29,13 +29,13 @@ extern const unsigned long sys_call_table[]; */ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->orig_ax & __SYSCALL_MASK; + return regs->orig_ax; } static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { - regs->ax = regs->orig_ax & __SYSCALL_MASK; + regs->ax = regs->orig_ax; } static inline long syscall_get_error(struct task_struct *task, From af0d9187f66db2711f94dc20d5a06ec9ba5845b3 Mon Sep 17 00:00:00 2001 From: Asias He Date: Tue, 2 Apr 2013 23:31:37 +0800 Subject: [PATCH 037/303] tcm_vhost: Use ACCESS_ONCE for vs->vs_tpg[target] access In vhost_scsi_handle_vq: tv_tpg = vs->vs_tpg[target]; if (!tv_tpg) { .... return } tv_cmd = vhost_scsi_allocate_cmd(tv_tpg, &v_req, 1) vs->vs_tpg[target] might change after the NULL check and 2) the above line might access tv_tpg from vs->vs_tpg[target]. To prevent 2), use ACCESS_ONCE. Thanks mst for catching this up! Signed-off-by: Asias He Acked-by: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger --- drivers/vhost/tcm_vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 2968b4934659..dd9614eb2577 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -661,7 +661,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, /* Extract the tpgt */ target = v_req.lun[1]; - tv_tpg = vs->vs_tpg[target]; + tv_tpg = ACCESS_ONCE(vs->vs_tpg[target]); /* Target does not exist, fail the request */ if (unlikely(!tv_tpg)) { From b8178f130e25c1bdac1c33e0996f1ff6e20ec08e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 1 Apr 2013 15:47:39 -0600 Subject: [PATCH 038/303] Revert "PCI/ACPI: Request _OSC control before scanning PCI root bus" This reverts commit 8c33f51df406e1a1f7fa4e9b244845b7ebd61fa6. Conflicts: drivers/acpi/pci_root.c This commit broke some pre-1.1 PCIe devices by leaving them with ASPM enabled. Previously, we had disabled ASPM on these devices because many of them don't implement it correctly (per 149e1637). Requesting _OSC control early means that aspm_disabled may be set before we scan the PCI bus and configure link ASPM state. But the ASPM configuration currently skips the check for pre-PCIe 1.1 devices when aspm_disabled is set, like this: acpi_pci_root_add acpi_pci_osc_support if (flags != base_flags) pcie_no_aspm aspm_disabled = 1 pci_acpi_scan_root ... pcie_aspm_init_link_state pcie_aspm_sanity_check if (!aspm_disabled) /* check for pre-PCIe 1.1 device */ Therefore, setting aspm_disabled early means that we leave ASPM enabled on these pre-PCIe 1.1 devices, which is a regression for some devices. The best fix would be to clean up the ASPM init so we can evaluate _OSC before scanning the bug (that way boot-time and hot-add discovery will work the same), but that requires significant rework. For now, we'll just revert the _OSC change as the lowest-risk fix. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=55211 Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Acked-by: Yinghai Lu CC: stable@vger.kernel.org # v3.8+ --- drivers/acpi/pci_root.c | 76 ++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 0ac546d5e53f..c740364d4abe 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -415,7 +415,6 @@ static int acpi_pci_root_add(struct acpi_device *device, struct acpi_pci_root *root; struct acpi_pci_driver *driver; u32 flags, base_flags; - bool is_osc_granted = false; root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); if (!root) @@ -476,6 +475,30 @@ static int acpi_pci_root_add(struct acpi_device *device, flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT; acpi_pci_osc_support(root, flags); + /* + * TBD: Need PCI interface for enumeration/configuration of roots. + */ + + mutex_lock(&acpi_pci_root_lock); + list_add_tail(&root->node, &acpi_pci_roots); + mutex_unlock(&acpi_pci_root_lock); + + /* + * Scan the Root Bridge + * -------------------- + * Must do this prior to any attempt to bind the root device, as the + * PCI namespace does not get created until this call is made (and + * thus the root bridge's pci_dev does not exist). + */ + root->bus = pci_acpi_scan_root(root); + if (!root->bus) { + printk(KERN_ERR PREFIX + "Bus %04x:%02x not present in PCI namespace\n", + root->segment, (unsigned int)root->secondary.start); + result = -ENODEV; + goto out_del_root; + } + /* Indicate support for various _OSC capabilities. */ if (pci_ext_cfg_avail()) flags |= OSC_EXT_PCI_CONFIG_SUPPORT; @@ -494,6 +517,7 @@ static int acpi_pci_root_add(struct acpi_device *device, flags = base_flags; } } + if (!pcie_ports_disabled && (flags & ACPI_PCIE_REQ_SUPPORT) == ACPI_PCIE_REQ_SUPPORT) { flags = OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL @@ -514,54 +538,28 @@ static int acpi_pci_root_add(struct acpi_device *device, status = acpi_pci_osc_control_set(device->handle, &flags, OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); if (ACPI_SUCCESS(status)) { - is_osc_granted = true; dev_info(&device->dev, "ACPI _OSC control (0x%02x) granted\n", flags); + if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { + /* + * We have ASPM control, but the FADT indicates + * that it's unsupported. Clear it. + */ + pcie_clear_aspm(root->bus); + } } else { - is_osc_granted = false; dev_info(&device->dev, "ACPI _OSC request failed (%s), " "returned control mask: 0x%02x\n", acpi_format_exception(status), flags); + pr_info("ACPI _OSC control for PCIe not granted, " + "disabling ASPM\n"); + pcie_no_aspm(); } } else { dev_info(&device->dev, - "Unable to request _OSC control " - "(_OSC support mask: 0x%02x)\n", flags); - } - - /* - * TBD: Need PCI interface for enumeration/configuration of roots. - */ - - mutex_lock(&acpi_pci_root_lock); - list_add_tail(&root->node, &acpi_pci_roots); - mutex_unlock(&acpi_pci_root_lock); - - /* - * Scan the Root Bridge - * -------------------- - * Must do this prior to any attempt to bind the root device, as the - * PCI namespace does not get created until this call is made (and - * thus the root bridge's pci_dev does not exist). - */ - root->bus = pci_acpi_scan_root(root); - if (!root->bus) { - printk(KERN_ERR PREFIX - "Bus %04x:%02x not present in PCI namespace\n", - root->segment, (unsigned int)root->secondary.start); - result = -ENODEV; - goto out_del_root; - } - - /* ASPM setting */ - if (is_osc_granted) { - if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) - pcie_clear_aspm(root->bus); - } else { - pr_info("ACPI _OSC control for PCIe not granted, " - "disabling ASPM\n"); - pcie_no_aspm(); + "Unable to request _OSC control " + "(_OSC support mask: 0x%02x)\n", flags); } pci_acpi_add_bus_pm_notifier(device, root->bus); From 6e0eda3c389887168455b92b6f7b48f552227067 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 14 Mar 2013 18:49:37 +0400 Subject: [PATCH 039/303] PCI: Don't try to disable Bus Master on disconnected PCI devices This is a fix for commit 7897e60227 ("PCI: Disable Bus Master unconditionally in pci_device_shutdown()"). Vivek reported that with this commit, kexec failed because none of his SATA disks came up. A ->shutdown() callback might put the device in D3cold, which means config space is no longer available. [bhelgaas: changelog] Link: https://lkml.org/lkml/2013/3/12/529 Reported-and-Tested-by: Vivek Goyal Signed-off-by: Konstantin Khlebnikov Signed-off-by: Bjorn Helgaas Cc: Rafael J. Wysocki --- drivers/pci/pci-driver.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 1fa1e482a999..79277fb36c6b 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -390,9 +390,10 @@ static void pci_device_shutdown(struct device *dev) /* * Turn off Bus Master bit on the device to tell it to not - * continue to do DMA + * continue to do DMA. Don't touch devices in D3cold or unknown states. */ - pci_clear_master(pci_dev); + if (pci_dev->current_state <= PCI_D3hot) + pci_clear_master(pci_dev); } #ifdef CONFIG_PM From 55985e15b80beb339d3e332f00a26003deff14e6 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 2 Apr 2013 01:29:29 +0200 Subject: [PATCH 040/303] ARM: mxs: Slow down the I2C clock speed Slow down the I2C clock speed on M28 and SPS1 as it turns out the I2C block in i.MX28 can not operate stable enough with the bus running at 400kHz. Note that the driver used by Freescale runs the bus at 250kHz when 400kHz speed is selected, but the mainline Linux kernel runs the bus at actual 400kHz and that's where it is occasionally unstable. Play safe and run the bus at 100kHz. Signed-off-by: Marek Vasut Cc: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx28-m28evk.dts | 1 - arch/arm/boot/dts/imx28-sps1.dts | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/imx28-m28evk.dts b/arch/arm/boot/dts/imx28-m28evk.dts index 6ce3d17c3a29..fd36e1cca104 100644 --- a/arch/arm/boot/dts/imx28-m28evk.dts +++ b/arch/arm/boot/dts/imx28-m28evk.dts @@ -152,7 +152,6 @@ i2c0: i2c@80058000 { pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins_a>; - clock-frequency = <400000>; status = "okay"; sgtl5000: codec@0a { diff --git a/arch/arm/boot/dts/imx28-sps1.dts b/arch/arm/boot/dts/imx28-sps1.dts index e6cde8aa7fff..6c6a5442800a 100644 --- a/arch/arm/boot/dts/imx28-sps1.dts +++ b/arch/arm/boot/dts/imx28-sps1.dts @@ -70,7 +70,6 @@ i2c0: i2c@80058000 { pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins_a>; - clock-frequency = <400000>; status = "okay"; rtc: rtc@51 { From a9b977ecd3dbc5d4f0fe0b3d5c66d284859b1f2a Mon Sep 17 00:00:00 2001 From: Prathyush K Date: Tue, 2 Apr 2013 16:53:01 +0530 Subject: [PATCH 041/303] ASoC: Samsung: return error if drvdata is not set This patch fixes a possible crash in case drvdata for the secondary device is not set. Signed-off-by: Prathyush K Signed-off-by: Padmavathi Venna Signed-off-by: Mark Brown --- sound/soc/samsung/i2s.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index d7231e336a7c..f1fc06419560 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -1107,6 +1107,10 @@ static int samsung_i2s_probe(struct platform_device *pdev) if (samsung_dai_type == TYPE_SEC) { sec_dai = dev_get_drvdata(&pdev->dev); + if (!sec_dai) { + dev_err(&pdev->dev, "Unable to get drvdata\n"); + return -EFAULT; + } snd_soc_register_dai(&sec_dai->pdev->dev, &sec_dai->i2s_dai_drv); asoc_dma_platform_register(&pdev->dev); From c6f9b1eb0e5df468891eff17f981b76c86f95f3a Mon Sep 17 00:00:00 2001 From: Prathyush K Date: Tue, 2 Apr 2013 16:53:02 +0530 Subject: [PATCH 042/303] ASoC: Samsung: set drvdata before adding secondary device Currently, a new platform device is created for secondary device by calling platform_device_register_resndata and then the drvdata is set for this device. The following patch has been added to driver core: "driver core: fix possible missing of device probe". This results in the added device getting probed immediately but the drvdata for the secondary device is not yet set. This patch removes the platform_device_register_resndata call and instead calls platform_device_alloc, platform_set_drvdata and platform_device_add which fixes the above issue. Signed-off-by: Prathyush K Signed-off-by: Padmavathi Venna Signed-off-by: Mark Brown --- sound/soc/samsung/i2s.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index f1fc06419560..6bbeb0bf1a73 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -972,6 +972,7 @@ static const struct snd_soc_dai_ops samsung_i2s_dai_ops = { static struct i2s_dai *i2s_alloc_dai(struct platform_device *pdev, bool sec) { struct i2s_dai *i2s; + int ret; i2s = devm_kzalloc(&pdev->dev, sizeof(struct i2s_dai), GFP_KERNEL); if (i2s == NULL) @@ -996,15 +997,17 @@ static struct i2s_dai *i2s_alloc_dai(struct platform_device *pdev, bool sec) i2s->i2s_dai_drv.capture.channels_max = 2; i2s->i2s_dai_drv.capture.rates = SAMSUNG_I2S_RATES; i2s->i2s_dai_drv.capture.formats = SAMSUNG_I2S_FMTS; + dev_set_drvdata(&i2s->pdev->dev, i2s); } else { /* Create a new platform_device for Secondary */ - i2s->pdev = platform_device_register_resndata(NULL, - "samsung-i2s-sec", -1, NULL, 0, NULL, 0); + i2s->pdev = platform_device_alloc("samsung-i2s-sec", -1); if (IS_ERR(i2s->pdev)) return NULL; - } - /* Pre-assign snd_soc_dai_set_drvdata */ - dev_set_drvdata(&i2s->pdev->dev, i2s); + platform_set_drvdata(i2s->pdev, i2s); + ret = platform_device_add(i2s->pdev); + if (ret < 0) + return NULL; + } return i2s; } From 5aa995e83ac7727b7705431e6eb2b317c59b95ba Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 3 Apr 2013 11:00:01 +0200 Subject: [PATCH 043/303] ASoC: tegra: Don't claim to support PCM pause and resume The tegra dmaengine driver does not support pausing and resuming a DMA stream. The tegra PCM driver still claims to support pause and resume though and implements them by stopping and restarting the stream. This is not what an application using pause/resume would expect. Usually applications have support for working around PCMs which do not support suspend and resume, so don't set the SNDRV_PCM_INFO_PAUSE and SNDRV_PCM_INFO_RESUME flags for the tegra PCM and use the default snd_dmaengine_pcm_trigger callback. Signed-off-by: Lars-Peter Clausen Reviewed-by: Stephen Warren Tested-by: Stephen Warren Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_pcm.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index c925ab0adeb6..5e2c55c5b255 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -43,8 +43,6 @@ static const struct snd_pcm_hardware tegra_pcm_hardware = { .info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE | - SNDRV_PCM_INFO_RESUME | SNDRV_PCM_INFO_INTERLEAVED, .formats = SNDRV_PCM_FMTBIT_S16_LE, .channels_min = 2, @@ -127,26 +125,6 @@ static int tegra_pcm_hw_free(struct snd_pcm_substream *substream) return 0; } -static int tegra_pcm_trigger(struct snd_pcm_substream *substream, int cmd) -{ - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - return snd_dmaengine_pcm_trigger(substream, - SNDRV_PCM_TRIGGER_START); - - case SNDRV_PCM_TRIGGER_STOP: - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - return snd_dmaengine_pcm_trigger(substream, - SNDRV_PCM_TRIGGER_STOP); - default: - return -EINVAL; - } - return 0; -} - static int tegra_pcm_mmap(struct snd_pcm_substream *substream, struct vm_area_struct *vma) { @@ -164,7 +142,7 @@ static struct snd_pcm_ops tegra_pcm_ops = { .ioctl = snd_pcm_lib_ioctl, .hw_params = tegra_pcm_hw_params, .hw_free = tegra_pcm_hw_free, - .trigger = tegra_pcm_trigger, + .trigger = snd_dmaengine_pcm_trigger, .pointer = snd_dmaengine_pcm_pointer, .mmap = tegra_pcm_mmap, }; From 5448bd8ca139bf8f82b93e10acbc2691cd983a45 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 2 Apr 2013 18:39:53 +0200 Subject: [PATCH 044/303] tty: Documentation: fix a path in a DocBook template A wrong path to a driver breaks DocBook built. Signed-off-by: Guennadi Liakhovetski Acked-by: Randy Dunlap Acked-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- Documentation/DocBook/device-drivers.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index 7514dbf0a679..c36892c072da 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -227,7 +227,7 @@ X!Isound/sound_firmware.c 16x50 UART Driver !Edrivers/tty/serial/serial_core.c -!Edrivers/tty/serial/8250/8250.c +!Edrivers/tty/serial/8250/8250_core.c From 6d4f0139d642c45411a47879325891ce2a7c164a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 27 Feb 2013 11:41:41 -0800 Subject: [PATCH 045/303] misc/vmw_vmci: Add dependency on CONFIG_NET Building the vmw_vmci driver with CONFIG_NET undefined results in: drivers/built-in.o: In function `__qp_memcpy_from_queue.isra.13': vmci_queue_pair.c:(.text+0x1671a8): undefined reference to `memcpy_toiovec' drivers/built-in.o: In function `__qp_memcpy_to_queue.isra.14': vmci_queue_pair.c:(.text+0x167341): undefined reference to `memcpy_fromiovec' make[1]: [vmlinux] Error 1 (ignored) since memcpy_toiovec and memcpy_fromiovec are defined in the networking code. Add the missing dependency. Signed-off-by: Guenter Roeck Acked-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig index 39c2ecadb273..ea98f7e9ccd1 100644 --- a/drivers/misc/vmw_vmci/Kconfig +++ b/drivers/misc/vmw_vmci/Kconfig @@ -4,7 +4,7 @@ config VMWARE_VMCI tristate "VMware VMCI Driver" - depends on X86 && PCI + depends on X86 && PCI && NET help This is VMware's Virtual Machine Communication Interface. It enables high-speed communication between host and guest in a virtual From c678ef5286ddb5cf70384ad5af286b0afc9b73e1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2013 21:53:57 +0200 Subject: [PATCH 046/303] block: avoid using uninitialized value in from queue_var_store As found by gcc-4.8, the QUEUE_SYSFS_BIT_FNS macro creates functions that use a value generated by queue_var_store independent of whether that value was set or not. block/blk-sysfs.c: In function 'queue_store_nonrot': block/blk-sysfs.c:244:385: warning: 'val' may be used uninitialized in this function [-Wmaybe-uninitialized] Unlike most other such warnings, this one is not a false positive, writing any non-number string into the sysfs files indeed has an undefined result, rather than returning an error. Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6206a934eb8c..5efc5a647183 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -229,6 +229,8 @@ queue_store_##name(struct request_queue *q, const char *page, size_t count) \ unsigned long val; \ ssize_t ret; \ ret = queue_var_store(&val, page, count); \ + if (ret < 0) \ + return ret; \ if (neg) \ val = !val; \ \ From d0d096b1d8b97172cee953f1a00b7d4379d398da Mon Sep 17 00:00:00 2001 From: Asai Thambi S P Date: Wed, 3 Apr 2013 19:53:07 +0530 Subject: [PATCH 047/303] mtip32xx: recovery from command timeout To recover from command timeouts, reset the device. In addition to that improved timeout handling of PIO commands. Signed-off-by: Sam Bradshaw Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 177 +++++++++++++++--------------- 1 file changed, 87 insertions(+), 90 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 92250af84e7d..30f233634650 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -243,40 +243,31 @@ static inline void release_slot(struct mtip_port *port, int tag) /* * Reset the HBA (without sleeping) * - * Just like hba_reset, except does not call sleep, so can be - * run from interrupt/tasklet context. - * * @dd Pointer to the driver data structure. * * return value * 0 The reset was successful. * -1 The HBA Reset bit did not clear. */ -static int hba_reset_nosleep(struct driver_data *dd) +static int mtip_hba_reset(struct driver_data *dd) { unsigned long timeout; - /* Chip quirk: quiesce any chip function */ - mdelay(10); - /* Set the reset bit */ writel(HOST_RESET, dd->mmio + HOST_CTL); /* Flush */ readl(dd->mmio + HOST_CTL); - /* - * Wait 10ms then spin for up to 1 second - * waiting for reset acknowledgement - */ - timeout = jiffies + msecs_to_jiffies(1000); - mdelay(10); - while ((readl(dd->mmio + HOST_CTL) & HOST_RESET) - && time_before(jiffies, timeout)) - mdelay(1); + /* Spin for up to 2 seconds, waiting for reset acknowledgement */ + timeout = jiffies + msecs_to_jiffies(2000); + do { + mdelay(10); + if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) + return -1; - if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) - return -1; + } while ((readl(dd->mmio + HOST_CTL) & HOST_RESET) + && time_before(jiffies, timeout)); if (readl(dd->mmio + HOST_CTL) & HOST_RESET) return -1; @@ -481,7 +472,7 @@ static void mtip_restart_port(struct mtip_port *port) dev_warn(&port->dd->pdev->dev, "PxCMD.CR not clear, escalating reset\n"); - if (hba_reset_nosleep(port->dd)) + if (mtip_hba_reset(port->dd)) dev_err(&port->dd->pdev->dev, "HBA reset escalation failed.\n"); @@ -527,6 +518,26 @@ static void mtip_restart_port(struct mtip_port *port) } +static int mtip_device_reset(struct driver_data *dd) +{ + int rv = 0; + + if (mtip_check_surprise_removal(dd->pdev)) + return 0; + + if (mtip_hba_reset(dd) < 0) + rv = -EFAULT; + + mdelay(1); + mtip_init_port(dd->port); + mtip_start_port(dd->port); + + /* Enable interrupts on the HBA. */ + writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, + dd->mmio + HOST_CTL); + return rv; +} + /* * Helper function for tag logging */ @@ -632,7 +643,7 @@ static void mtip_timeout_function(unsigned long int data) if (cmdto_cnt) { print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { - mtip_restart_port(port); + mtip_device_reset(port->dd); wake_up_interruptible(&port->svc_wait); } clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); @@ -1283,11 +1294,11 @@ static int mtip_exec_internal_command(struct mtip_port *port, int rv = 0, ready2go = 1; struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL]; unsigned long to; + struct driver_data *dd = port->dd; /* Make sure the buffer is 8 byte aligned. This is asic specific. */ if (buffer & 0x00000007) { - dev_err(&port->dd->pdev->dev, - "SG buffer is not 8 byte aligned\n"); + dev_err(&dd->pdev->dev, "SG buffer is not 8 byte aligned\n"); return -EFAULT; } @@ -1300,23 +1311,21 @@ static int mtip_exec_internal_command(struct mtip_port *port, mdelay(100); } while (time_before(jiffies, to)); if (!ready2go) { - dev_warn(&port->dd->pdev->dev, + dev_warn(&dd->pdev->dev, "Internal cmd active. new cmd [%02X]\n", fis->command); return -EBUSY; } set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); port->ic_pause_timer = 0; - if (fis->command == ATA_CMD_SEC_ERASE_UNIT) - clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); - else if (fis->command == ATA_CMD_DOWNLOAD_MICRO) - clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); if (atomic == GFP_KERNEL) { if (fis->command != ATA_CMD_STANDBYNOW1) { /* wait for io to complete if non atomic */ if (mtip_quiesce_io(port, 5000) < 0) { - dev_warn(&port->dd->pdev->dev, + dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); release_slot(port, MTIP_TAG_INTERNAL); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1361,58 +1370,84 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* Issue the command to the hardware */ mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); - /* Poll if atomic, wait_for_completion otherwise */ if (atomic == GFP_KERNEL) { /* Wait for the command to complete or timeout. */ - if (wait_for_completion_timeout( + if (wait_for_completion_interruptible_timeout( &wait, - msecs_to_jiffies(timeout)) == 0) { - dev_err(&port->dd->pdev->dev, - "Internal command did not complete [%d] " - "within timeout of %lu ms\n", - atomic, timeout); - if (mtip_check_surprise_removal(port->dd->pdev) || + msecs_to_jiffies(timeout)) <= 0) { + if (rv == -ERESTARTSYS) { /* interrupted */ + dev_err(&dd->pdev->dev, + "Internal command [%02X] was interrupted after %lu ms\n", + fis->command, timeout); + rv = -EINTR; + goto exec_ic_exit; + } else if (rv == 0) /* timeout */ + dev_err(&dd->pdev->dev, + "Internal command did not complete [%02X] within timeout of %lu ms\n", + fis->command, timeout); + else + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n", + fis->command, rv, timeout); + + if (mtip_check_surprise_removal(dd->pdev) || test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { + &dd->dd_flag)) { + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned due to SR\n", + fis->command); rv = -ENXIO; goto exec_ic_exit; } + mtip_device_reset(dd); /* recover from timeout issue */ rv = -EAGAIN; + goto exec_ic_exit; } } else { + u32 hba_stat, port_stat; + /* Spin for checking if command still outstanding */ timeout = jiffies + msecs_to_jiffies(timeout); while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) && time_before(jiffies, timeout)) { - if (mtip_check_surprise_removal(port->dd->pdev)) { + if (mtip_check_surprise_removal(dd->pdev)) { rv = -ENXIO; goto exec_ic_exit; } if ((fis->command != ATA_CMD_STANDBYNOW1) && test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { + &dd->dd_flag)) { rv = -ENXIO; goto exec_ic_exit; } - if (readl(port->mmio + PORT_IRQ_STAT) & PORT_IRQ_ERR) { - atomic_inc(&int_cmd->active); /* error */ - break; + port_stat = readl(port->mmio + PORT_IRQ_STAT); + if (!port_stat) + continue; + + if (port_stat & PORT_IRQ_ERR) { + dev_err(&dd->pdev->dev, + "Internal command [%02X] failed\n", + fis->command); + mtip_device_reset(dd); + rv = -EIO; + goto exec_ic_exit; + } else { + writel(port_stat, port->mmio + PORT_IRQ_STAT); + hba_stat = readl(dd->mmio + HOST_IRQ_STAT); + if (hba_stat) + writel(hba_stat, + dd->mmio + HOST_IRQ_STAT); } + break; } } - if (atomic_read(&int_cmd->active) > 1) { - dev_err(&port->dd->pdev->dev, - "Internal command [%02X] failed\n", fis->command); - rv = -EIO; - } if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & (1 << MTIP_TAG_INTERNAL)) { rv = -ENXIO; - if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &port->dd->dd_flag)) { - mtip_restart_port(port); + if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { + mtip_device_reset(dd); rv = -EAGAIN; } } @@ -1724,7 +1759,8 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, * -EINVAL Invalid parameters passed in, trim not supported * -EIO Error submitting trim request to hw */ -static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len) +static int mtip_send_trim(struct driver_data *dd, unsigned int lba, + unsigned int len) { int i, rv = 0; u64 tlba, tlen, sect_left; @@ -1810,45 +1846,6 @@ static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors) return (bool) !!port->identify_valid; } -/* - * Reset the HBA. - * - * Resets the HBA by setting the HBA Reset bit in the Global - * HBA Control register. After setting the HBA Reset bit the - * function waits for 1 second before reading the HBA Reset - * bit to make sure it has cleared. If HBA Reset is not clear - * an error is returned. Cannot be used in non-blockable - * context. - * - * @dd Pointer to the driver data structure. - * - * return value - * 0 The reset was successful. - * -1 The HBA Reset bit did not clear. - */ -static int mtip_hba_reset(struct driver_data *dd) -{ - mtip_deinit_port(dd->port); - - /* Set the reset bit */ - writel(HOST_RESET, dd->mmio + HOST_CTL); - - /* Flush */ - readl(dd->mmio + HOST_CTL); - - /* Wait for reset to clear */ - ssleep(1); - - /* Check the bit has cleared */ - if (readl(dd->mmio + HOST_CTL) & HOST_RESET) { - dev_err(&dd->pdev->dev, - "Reset bit did not clear.\n"); - return -1; - } - - return 0; -} - /* * Display the identify command data. * From 6b06d35f3f63dbfde71c792b8584225f3d1fc7f2 Mon Sep 17 00:00:00 2001 From: Asai Thambi S P Date: Wed, 3 Apr 2013 19:54:35 +0530 Subject: [PATCH 048/303] mtip32xx: return 0 from pci probe in case of rebuild Fix to return 0 from pci probe in case of rebuild. If not, pci consider probe has failed, and crash during rmmod. Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 30f233634650..ce4e47dcc153 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4302,6 +4302,8 @@ static int mtip_pci_probe(struct pci_dev *pdev, instance++; if (rv != MTIP_FTL_REBUILD_MAGIC) set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); + else + rv = 0; /* device in rebuild state, return 0 from probe */ goto done; block_initialize_err: From 0caff00390db41c98976e65f2ebc3eeaa4861358 Mon Sep 17 00:00:00 2001 From: Asai Thambi S P Date: Wed, 3 Apr 2013 19:56:21 +0530 Subject: [PATCH 049/303] mtip32xx: Add debugfs entry device_status This patch adds a new debugfs entry 'device_status' in /sys/kernel/debug/rssd. The value of this entry shows devices online and those in the process of removing. Signed-off-by: Sam Bradshaw Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 148 +++++++++++++++++++++++++++++- drivers/block/mtip32xx/mtip32xx.h | 18 ++-- 2 files changed, 154 insertions(+), 12 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index ce4e47dcc153..8517af8c0fcb 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -81,12 +81,17 @@ /* Device instance number, incremented each time a device is probed. */ static int instance; +struct list_head online_list; +struct list_head removing_list; +spinlock_t dev_lock; + /* * Global variable used to hold the major block device number * allocated in mtip_init(). */ static int mtip_major; static struct dentry *dfs_parent; +static struct dentry *dfs_device_status; static u32 cpu_use[NR_CPUS]; @@ -2707,6 +2712,100 @@ static ssize_t mtip_hw_show_status(struct device *dev, static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL); +/* debugsfs entries */ + +static ssize_t show_device_status(struct device_driver *drv, char *buf) +{ + int size = 0; + struct driver_data *dd, *tmp; + unsigned long flags; + char id_buf[42]; + u16 status = 0; + + spin_lock_irqsave(&dev_lock, flags); + size += sprintf(&buf[size], "Devices Present:\n"); + list_for_each_entry_safe(dd, tmp, &online_list, online_list) { + if (dd && dd->pdev) { + if (dd->port && + dd->port->identify && + dd->port->identify_valid) { + strlcpy(id_buf, + (char *) (dd->port->identify + 10), 21); + status = *(dd->port->identify + 141); + } else { + memset(id_buf, 0, 42); + status = 0; + } + + if (dd->port && + test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) { + size += sprintf(&buf[size], + " device %s %s (ftl rebuild %d %%)\n", + dev_name(&dd->pdev->dev), + id_buf, + status); + } else { + size += sprintf(&buf[size], + " device %s %s\n", + dev_name(&dd->pdev->dev), + id_buf); + } + } + } + + size += sprintf(&buf[size], "Devices Being Removed:\n"); + list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) { + if (dd && dd->pdev) { + if (dd->port && + dd->port->identify && + dd->port->identify_valid) { + strlcpy(id_buf, + (char *) (dd->port->identify+10), 21); + status = *(dd->port->identify + 141); + } else { + memset(id_buf, 0, 42); + status = 0; + } + + if (dd->port && + test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) { + size += sprintf(&buf[size], + " device %s %s (ftl rebuild %d %%)\n", + dev_name(&dd->pdev->dev), + id_buf, + status); + } else { + size += sprintf(&buf[size], + " device %s %s\n", + dev_name(&dd->pdev->dev), + id_buf); + } + } + } + spin_unlock_irqrestore(&dev_lock, flags); + + return size; +} + +static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf, + size_t len, loff_t *offset) +{ + int size = *offset; + char buf[MTIP_DFS_MAX_BUF_SIZE]; + + if (!len || *offset) + return 0; + + size += show_device_status(NULL, buf); + + *offset = size <= len ? size : len; + size = copy_to_user(ubuf, buf, *offset); + if (size) + return -EFAULT; + + return *offset; +} + static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { @@ -2801,6 +2900,13 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, return *offset; } +static const struct file_operations mtip_device_status_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mtip_hw_read_device_status, + .llseek = no_llseek, +}; + static const struct file_operations mtip_regs_fops = { .owner = THIS_MODULE, .open = simple_open, @@ -4158,6 +4264,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, const struct cpumask *node_mask; int cpu, i = 0, j = 0; int my_node = NUMA_NO_NODE; + unsigned long flags; /* Allocate memory for this devices private data. */ my_node = pcibus_to_node(pdev->bus); @@ -4215,6 +4322,9 @@ static int mtip_pci_probe(struct pci_dev *pdev, dd->pdev = pdev; dd->numa_node = my_node; + INIT_LIST_HEAD(&dd->online_list); + INIT_LIST_HEAD(&dd->remove_list); + memset(dd->workq_name, 0, 32); snprintf(dd->workq_name, 31, "mtipq%d", dd->instance); @@ -4304,6 +4414,12 @@ static int mtip_pci_probe(struct pci_dev *pdev, set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); else rv = 0; /* device in rebuild state, return 0 from probe */ + + /* Add to online list even if in ftl rebuild */ + spin_lock_irqsave(&dev_lock, flags); + list_add(&dd->online_list, &online_list); + spin_unlock_irqrestore(&dev_lock, flags); + goto done; block_initialize_err: @@ -4337,9 +4453,15 @@ static void mtip_pci_remove(struct pci_dev *pdev) { struct driver_data *dd = pci_get_drvdata(pdev); int counter = 0; + unsigned long flags; set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); + spin_lock_irqsave(&dev_lock, flags); + list_del_init(&dd->online_list); + list_add(&dd->remove_list, &removing_list); + spin_unlock_irqrestore(&dev_lock, flags); + if (mtip_check_surprise_removal(pdev)) { while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) { counter++; @@ -4365,6 +4487,10 @@ static void mtip_pci_remove(struct pci_dev *pdev) pci_disable_msi(pdev); + spin_lock_irqsave(&dev_lock, flags); + list_del_init(&dd->remove_list); + spin_unlock_irqrestore(&dev_lock, flags); + kfree(dd); pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); } @@ -4512,6 +4638,11 @@ static int __init mtip_init(void) pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); + spin_lock_init(&dev_lock); + + INIT_LIST_HEAD(&online_list); + INIT_LIST_HEAD(&removing_list); + /* Allocate a major block device number to use with this driver. */ error = register_blkdev(0, MTIP_DRV_NAME); if (error <= 0) { @@ -4521,11 +4652,18 @@ static int __init mtip_init(void) } mtip_major = error; - if (!dfs_parent) { - dfs_parent = debugfs_create_dir("rssd", NULL); - if (IS_ERR_OR_NULL(dfs_parent)) { - pr_warn("Error creating debugfs parent\n"); - dfs_parent = NULL; + dfs_parent = debugfs_create_dir("rssd", NULL); + if (IS_ERR_OR_NULL(dfs_parent)) { + pr_warn("Error creating debugfs parent\n"); + dfs_parent = NULL; + } + if (dfs_parent) { + dfs_device_status = debugfs_create_file("device_status", + S_IRUGO, dfs_parent, NULL, + &mtip_device_status_fops); + if (IS_ERR_OR_NULL(dfs_device_status)) { + pr_err("Error creating device_status node\n"); + dfs_device_status = NULL; } } diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 3bffff5f670c..8e8334c9dd0f 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -129,9 +129,9 @@ enum { MTIP_PF_EH_ACTIVE_BIT = 1, /* error handling */ MTIP_PF_SE_ACTIVE_BIT = 2, /* secure erase */ MTIP_PF_DM_ACTIVE_BIT = 3, /* download microcde */ - MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | \ - (1 << MTIP_PF_EH_ACTIVE_BIT) | \ - (1 << MTIP_PF_SE_ACTIVE_BIT) | \ + MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | + (1 << MTIP_PF_EH_ACTIVE_BIT) | + (1 << MTIP_PF_SE_ACTIVE_BIT) | (1 << MTIP_PF_DM_ACTIVE_BIT)), MTIP_PF_SVC_THD_ACTIVE_BIT = 4, @@ -144,9 +144,9 @@ enum { MTIP_DDF_REMOVE_PENDING_BIT = 1, MTIP_DDF_OVER_TEMP_BIT = 2, MTIP_DDF_WRITE_PROTECT_BIT = 3, - MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \ - (1 << MTIP_DDF_SEC_LOCK_BIT) | \ - (1 << MTIP_DDF_OVER_TEMP_BIT) | \ + MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | + (1 << MTIP_DDF_SEC_LOCK_BIT) | + (1 << MTIP_DDF_OVER_TEMP_BIT) | (1 << MTIP_DDF_WRITE_PROTECT_BIT)), MTIP_DDF_CLEANUP_BIT = 5, @@ -180,7 +180,7 @@ struct mtip_work { #define MTIP_TRIM_TIMEOUT_MS 240000 #define MTIP_MAX_TRIM_ENTRIES 8 -#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 +#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 struct mtip_trim_entry { u32 lba; /* starting lba of region */ @@ -501,6 +501,10 @@ struct driver_data { atomic_t irq_workers_active; int isr_binding; + + struct list_head online_list; /* linkage for online list */ + + struct list_head remove_list; /* linkage for removing list */ }; #endif From 1f66396682a0be92e1c141bff4b006d7654832e1 Mon Sep 17 00:00:00 2001 From: Alexey Pelykh Date: Wed, 3 Apr 2013 14:31:46 -0400 Subject: [PATCH 050/303] OMAP/serial: Revert bad fix of Rx FIFO threshold granularity Partially reverts 1776fd059c40907297d6c26c51876575d63fd9e2 that introduced regression reported by Paul Walmsley. This commit restores setting granularity in SCR register and adds note about comments below being inconsistent with actual code. Signed-off-by: Alexey Pelykh Tested-by: Paul Walmsley Cc: Kevin Hilman Cc: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/omap-serial.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 4dc41408ecb7..30d4f7a783cd 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -886,6 +886,17 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR); /* FIFO ENABLE, DMA MODE */ + up->scr |= OMAP_UART_SCR_RX_TRIG_GRANU1_MASK; + /* + * NOTE: Setting OMAP_UART_SCR_RX_TRIG_GRANU1_MASK + * sets Enables the granularity of 1 for TRIGGER RX + * level. Along with setting RX FIFO trigger level + * to 1 (as noted below, 16 characters) and TLR[3:0] + * to zero this will result RX FIFO threshold level + * to 1 character, instead of 16 as noted in comment + * below. + */ + /* Set receive FIFO threshold to 16 characters and * transmit FIFO threshold to 16 spaces */ From 24ad0ef9c8b946ed2abe681e4e44f4a1e643d882 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 Mar 2013 11:07:29 +0000 Subject: [PATCH 051/303] PCI/ACPI: Always resume devices on ACPI wakeup notifications It turns out that the _Lxx control methods provided by some BIOSes clear the PME Status bit of PCI devices they handle, which means that pci_acpi_wake_dev() cannot really use that bit to check whether or not the device has signalled wakeup. One symptom of the problem is, for example, that when an affected PCI USB controller is runtime-suspended, then plugging in a new USB device into one of the controller's ports will not wake up the controller, which should happen. For this reason, make pci_acpi_wake_dev() always attempt to resume the device it is called for regardless of the device's PME Status bit value (that bit still has to be cleared if set at this point, though). Reported-by: Sarah Sharp Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Acked-by: Matthew Garrett CC: stable@vger.kernel.org # v3.7+ --- drivers/pci/pci-acpi.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index dee5dddaa292..5147c210df52 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -53,14 +53,15 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context) return; } - if (!pci_dev->pm_cap || !pci_dev->pme_support - || pci_check_pme_status(pci_dev)) { - if (pci_dev->pme_poll) - pci_dev->pme_poll = false; + /* Clear PME Status if set. */ + if (pci_dev->pme_support) + pci_check_pme_status(pci_dev); - pci_wakeup_event(pci_dev); - pm_runtime_resume(&pci_dev->dev); - } + if (pci_dev->pme_poll) + pci_dev->pme_poll = false; + + pci_wakeup_event(pci_dev); + pm_runtime_resume(&pci_dev->dev); if (pci_dev->subordinate) pci_pme_wakeup_bus(pci_dev->subordinate); From de7d5f729c72638f41d7c17487bccb1c570ff144 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 30 Mar 2013 23:38:02 +0100 Subject: [PATCH 052/303] PCI/PM: Disable runtime PM of PCIe ports The runtime PM of PCIe ports turns out to be quite fragile, as in some cases things work while in some other cases they don't and we don't seem to have a good way to determine whether or not they are going to work in advance. For this reason, avoid enabling runtime PM for PCIe ports by keeping their runtime PM reference counters always above 0 for the time being. When a PCIe port is suspended, it can no longer report events like hotplug, so hotplug below the port may not work, as in the bug report below. [bhelgaas: changelog, stable] Reference: https://bugzilla.kernel.org/show_bug.cgi?id=53811 Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.6+ --- drivers/pci/pcie/portdrv_pci.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 08c243ab034e..ed4d09498337 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -184,14 +184,6 @@ static const struct dev_pm_ops pcie_portdrv_pm_ops = { #define PCIE_PORTDRV_PM_OPS NULL #endif /* !PM */ -/* - * PCIe port runtime suspend is broken for some chipsets, so use a - * black list to disable runtime PM for these chipsets. - */ -static const struct pci_device_id port_runtime_pm_black_list[] = { - { /* end: all zeroes */ } -}; - /* * pcie_portdrv_probe - Probe PCI-Express port devices * @dev: PCI-Express port device being probed @@ -225,16 +217,11 @@ static int pcie_portdrv_probe(struct pci_dev *dev, * it by default. */ dev->d3cold_allowed = false; - if (!pci_match_id(port_runtime_pm_black_list, dev)) - pm_runtime_put_noidle(&dev->dev); - return 0; } static void pcie_portdrv_remove(struct pci_dev *dev) { - if (!pci_match_id(port_runtime_pm_black_list, dev)) - pm_runtime_get_noresume(&dev->dev); pcie_port_device_remove(dev); pci_disable_device(dev); } From d8668fcb0b257d9fdcfbe5c172a99b8d85e1cd82 Mon Sep 17 00:00:00 2001 From: Shan Hai Date: Mon, 18 Mar 2013 10:30:43 +0800 Subject: [PATCH 053/303] libata: Use integer return value for atapi_command_packet_set The function returns type of ATAPI drives so it should return integer value. The commit 4dce8ba94c7 (libata: Use 'bool' return value for ata_id_XXX) since v2.6.39 changed the type of return value from int to bool, the change would cause all of the ATAPI class drives to be treated as TYPE_TAPE and the max_sectors of the drives to be set to 65535 because of the commit f8d8e5799b7(libata: increase 128 KB / cmd limit for ATAPI tape drives), for the function would return true for all ATAPI class drives and the TYPE_TAPE is defined as 0x01. Cc: stable@vger.kernel.org Signed-off-by: Shan Hai Signed-off-by: Jeff Garzik --- include/linux/ata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ata.h b/include/linux/ata.h index 8f7a3d68371a..ee0bd9524055 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -954,7 +954,7 @@ static inline int atapi_cdb_len(const u16 *dev_id) } } -static inline bool atapi_command_packet_set(const u16 *dev_id) +static inline int atapi_command_packet_set(const u16 *dev_id) { return (dev_id[ATA_ID_CONFIG] >> 8) & 0x1f; } From a32450e127fc6e5ca6d958ceb3cfea4d30a00846 Mon Sep 17 00:00:00 2001 From: Shan Hai Date: Mon, 18 Mar 2013 10:30:44 +0800 Subject: [PATCH 054/303] libata: Set max sector to 65535 for Slimtype DVD A DS8A8SH drive The Slimtype DVD A DS8A8SH drive locks up when max sector is smaller than 65535, and the blow backtrace is observed on locking up: INFO: task flush-8:32:1130 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. flush-8:32 D ffffffff8180cf60 0 1130 2 0x00000000 ffff880273aef618 0000000000000046 0000000000000005 ffff880273aee000 ffff880273aee000 ffff880273aeffd8 ffff880273aee010 ffff880273aee000 ffff880273aeffd8 ffff880273aee000 ffff88026e842ea0 ffff880274a10000 Call Trace: [] schedule+0x5d/0x70 [] io_schedule+0x8c/0xd0 [] get_request+0x731/0x7d0 [] ? cfq_allow_merge+0x50/0x90 [] ? wake_up_bit+0x40/0x40 [] ? bio_attempt_back_merge+0x33/0x110 [] blk_queue_bio+0x23a/0x3f0 [] generic_make_request+0xc6/0x120 [] submit_bio+0x138/0x160 [] ? bio_alloc_bioset+0x96/0x120 [] submit_bh+0x1f1/0x220 [] __block_write_full_page+0x228/0x340 [] ? attach_nobh_buffers+0xc0/0xc0 [] ? I_BDEV+0x10/0x10 [] ? I_BDEV+0x10/0x10 [] block_write_full_page_endio+0xe6/0x100 [] block_write_full_page+0x15/0x20 [] blkdev_writepage+0x18/0x20 [] __writepage+0x17/0x40 [] write_cache_pages+0x34a/0x4a0 [] ? set_page_dirty+0x70/0x70 [] generic_writepages+0x51/0x80 [] do_writepages+0x20/0x50 [] __writeback_single_inode+0xa6/0x2b0 [] writeback_sb_inodes+0x311/0x4d0 [] __writeback_inodes_wb+0x86/0xd0 [] wb_writeback+0x1a3/0x330 [] ? _raw_spin_lock_irqsave+0x3f/0x50 [] ? get_nr_inodes+0x52/0x70 [] wb_do_writeback+0x1dc/0x260 [] ? schedule_timeout+0x204/0x240 [] bdi_writeback_thread+0x102/0x2b0 [] ? wb_do_writeback+0x260/0x260 [] kthread+0xc0/0xd0 [] ? kthread_worker_fn+0x1b0/0x1b0 [] ret_from_fork+0x7c/0xb0 [] ? kthread_worker_fn+0x1b0/0x1b0 The above trace was triggered by "dd if=/dev/zero of=/dev/sr0 bs=2048 count=32768" It was previously working by accident, since another bug introduced by 4dce8ba94c7 (libata: Use 'bool' return value for ata_id_XXX) caused all drives to use maxsect=65535. Cc: stable@vger.kernel.org Signed-off-by: Shan Hai Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 4 ++++ include/linux/libata.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 497adea1f0d6..0075944a64dc 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2439,6 +2439,9 @@ int ata_dev_configure(struct ata_device *dev) dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_128, dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_LBA48) + dev->max_sectors = ATA_MAX_SECTORS_LBA48; + if (ap->ops->dev_config) ap->ops->dev_config(dev); @@ -4100,6 +4103,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, { "QUANTUM DAT DAT72-000", NULL, ATA_HORKAGE_ATAPI_MOD16_DMA }, + { "Slimtype DVD A DS8A8SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, /* Devices we expect to fail diagnostics */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 91c9d109e5f1..eae7a053dc51 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -398,6 +398,7 @@ enum { ATA_HORKAGE_NOSETXFER = (1 << 14), /* skip SETXFER, SATA only */ ATA_HORKAGE_BROKEN_FPDMA_AA = (1 << 15), /* skip AA */ ATA_HORKAGE_DUMP_ID = (1 << 16), /* dump IDENTIFY data */ + ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ From b55f84e2d527182e7c611d466cd0bb6ddce201de Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Wed, 6 Mar 2013 10:49:05 -0500 Subject: [PATCH 055/303] ata_piix: Fix DVD not dectected at some Haswell platforms There is a quirk patch 5e5a4f5d5a08c9c504fe956391ac3dae2c66556d "ata_piix: make DVD Drive recognisable on systems with Intel Sandybridge chipsets(v2)" fixing the 4 ports IDE controller 32bit PIO mode. We've hit a problem with DVD not recognized on Haswell Desktop platform which includes Lynx Point 2-port SATA controller. This quirk patch disables 32bit PIO on this controller in IDE mode. v2: Change spelling error in statememnt pointed by Sergei Shtylyov. v3: Change comment statememnt and spliting line over 80 characters pointed by Libor Pechacek and also rebase the patch against 3.8-rc7 kernel. Tested-by: Lee, Chun-Yi Signed-off-by: Youquan Song Cc: stable@vger.kernel.org Signed-off-by: Jeff Garzik --- drivers/ata/ata_piix.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index ffdd32d22602..2f48123d74c4 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -150,6 +150,7 @@ enum piix_controller_ids { tolapai_sata, piix_pata_vmw, /* PIIX4 for VMware, spurious DMA_ERR */ ich8_sata_snb, + ich8_2port_sata_snb, }; struct piix_map_db { @@ -304,7 +305,7 @@ static const struct pci_device_id piix_pci_tbl[] = { /* SATA Controller IDE (Lynx Point) */ { 0x8086, 0x8c01, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, /* SATA Controller IDE (Lynx Point) */ - { 0x8086, 0x8c08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + { 0x8086, 0x8c08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb }, /* SATA Controller IDE (Lynx Point) */ { 0x8086, 0x8c09, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, /* SATA Controller IDE (Lynx Point-LP) */ @@ -439,6 +440,7 @@ static const struct piix_map_db *piix_map_db_table[] = { [ich8m_apple_sata] = &ich8m_apple_map_db, [tolapai_sata] = &tolapai_map_db, [ich8_sata_snb] = &ich8_map_db, + [ich8_2port_sata_snb] = &ich8_2port_map_db, }; static struct pci_bits piix_enable_bits[] = { @@ -1242,6 +1244,16 @@ static struct ata_port_info piix_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &piix_sata_ops, }, + + [ich8_2port_sata_snb] = + { + .flags = PIIX_SATA_FLAGS | PIIX_FLAG_SIDPR + | PIIX_FLAG_PIO16, + .pio_mask = ATA_PIO4, + .mwdma_mask = ATA_MWDMA2, + .udma_mask = ATA_UDMA6, + .port_ops = &piix_sata_ops, + }, }; #define AHCI_PCI_BAR 5 From 8e725c7f8a60feaa88edacd4dee2c754d5ae7706 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 29 Mar 2013 11:54:55 +0000 Subject: [PATCH 056/303] libata: fix DMA to stack in reading devslp_timing parameters Commit 803739d25c2343da6d2f95eebdcbc08bf67097d4 ("[libata] replace sata_settings with devslp_timing"), which was also Cc: stable, used a stack buffer to receive data from ata_read_log_page(), which triggers the following warning: ahci 0000:00:1f.2: DMA-API: device driver maps memory fromstack [addr=ffff880140469948] Fix this by using ap->sector_buf instead of a stack buffer. Signed-off-by: David Woodhouse Cc: stable@kernel.org Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 0075944a64dc..63c743baf920 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2329,7 +2329,7 @@ int ata_dev_configure(struct ata_device *dev) * from SATA Settings page of Identify Device Data Log. */ if (ata_id_has_devslp(dev->id)) { - u8 sata_setting[ATA_SECT_SIZE]; + u8 *sata_setting = ap->sector_buf; int i, j; dev->flags |= ATA_DFLAG_DEVSLP; From 6d3bfc7be6f80d0c6ee6800d58d573343bf6e260 Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Wed, 27 Mar 2013 13:51:14 +0100 Subject: [PATCH 057/303] [libata] Fix HDIO_DRIVE_* ioctl() Linux 3.9 regression On Mon, Mar 25, 2013 at 06:26:50PM +0100, Ronald wrote: > In reply to [1]: I have the same issue. Git bisect took 50+ rebuilds xD > > Smartd does not work anymore since 84a9a8cd9 ([libata] Set proper SK > when CK_COND is set.). > [1] http://www.spinics.net/lists/linux-ide/msg45268.html It seems that the SAM_STAT_CHECK_CONDITION is not cleared causing -EIO, because that patch modified sensebuf and the check for clearing SAM_STAT_CHECK_CONDITION is no longer valid. Fix that. Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 318b41358187..ff44787e5a45 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -532,8 +532,8 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) struct scsi_sense_hdr sshdr; scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, &sshdr); - if (sshdr.sense_key == 0 && - sshdr.asc == 0 && sshdr.ascq == 0) + if (sshdr.sense_key == RECOVERED_ERROR && + sshdr.asc == 0 && sshdr.ascq == 0x1d) cmd_result &= ~SAM_STAT_CHECK_CONDITION; } @@ -618,8 +618,8 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg) struct scsi_sense_hdr sshdr; scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, &sshdr); - if (sshdr.sense_key == 0 && - sshdr.asc == 0 && sshdr.ascq == 0) + if (sshdr.sense_key == RECOVERED_ERROR && + sshdr.asc == 0 && sshdr.ascq == 0x1d) cmd_result &= ~SAM_STAT_CHECK_CONDITION; } From c66bb3f075cfe2d17b2427e96e043622db02759c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 Apr 2013 09:03:41 +0200 Subject: [PATCH 058/303] mtip32xx: fix two smatch warnings Dan reports: New smatch warnings: drivers/block/mtip32xx/mtip32xx.c:2728 show_device_status() warn: variable dereferenced before check 'dd' (see line 2727) drivers/block/mtip32xx/mtip32xx.c:2758 show_device_status() warn: variable dereferenced before check 'dd' (see line 2757) which are checking if dd == NULL, in a list_for_each_entry() type loop. Get rid of the check, dd can never be NULL here. Reported-by: Dan Carpenter Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 8517af8c0fcb..32c678028e53 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2725,7 +2725,7 @@ static ssize_t show_device_status(struct device_driver *drv, char *buf) spin_lock_irqsave(&dev_lock, flags); size += sprintf(&buf[size], "Devices Present:\n"); list_for_each_entry_safe(dd, tmp, &online_list, online_list) { - if (dd && dd->pdev) { + if (dd->pdev) { if (dd->port && dd->port->identify && dd->port->identify_valid) { @@ -2755,7 +2755,7 @@ static ssize_t show_device_status(struct device_driver *drv, char *buf) size += sprintf(&buf[size], "Devices Being Removed:\n"); list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) { - if (dd && dd->pdev) { + if (dd->pdev) { if (dd->port && dd->port->identify && dd->port->identify_valid) { From b39599b7cb8f295205b04827ff82e3afec687cde Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 6 Mar 2013 16:56:48 -0600 Subject: [PATCH 059/303] remoteproc: fix the error check for idr_alloc The new idr_alloc interface returns the allocated id back on success, so fix the error path to check for negative values. This was missed out in the newer idr interface adoption patch, 15fc611 "remoteproc: convert to idr_alloc()". Signed-off-by: Suman Anna Acked-by: Tejun Heo Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/remoteproc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 29387df4bfc9..fb0fb342bf96 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -217,7 +217,7 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i) * TODO: support predefined notifyids (via resource table) */ ret = idr_alloc(&rproc->notifyids, rvring, 0, 0, GFP_KERNEL); - if (ret) { + if (ret < 0) { dev_err(dev, "idr_alloc failed: %d\n", ret); dma_free_coherent(dev->parent, size, va, dma); return ret; From 1cd425b660bd5b4f41b9175b0b7bf3828ce88144 Mon Sep 17 00:00:00 2001 From: Dmitry Tarnyagin Date: Sun, 10 Feb 2013 12:39:04 +0100 Subject: [PATCH 060/303] remoteproc/ste: fix memory leak on shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes coherent memory leakage, caused by non-deallocated firmware image chunk. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Tarnyagin Signed-off-by: Sjur Brændeland Acked-by: Ido Yariv [slightly edit subject and commit log] Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/ste_modem_rproc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/remoteproc/ste_modem_rproc.c b/drivers/remoteproc/ste_modem_rproc.c index a7743c069339..fb95c4220052 100644 --- a/drivers/remoteproc/ste_modem_rproc.c +++ b/drivers/remoteproc/ste_modem_rproc.c @@ -240,6 +240,8 @@ static int sproc_drv_remove(struct platform_device *pdev) /* Unregister as remoteproc device */ rproc_del(sproc->rproc); + dma_free_coherent(sproc->rproc->dev.parent, SPROC_FW_SIZE, + sproc->fw_addr, sproc->fw_dma_addr); rproc_put(sproc->rproc); mdev->drv_data = NULL; @@ -297,10 +299,13 @@ static int sproc_probe(struct platform_device *pdev) /* Register as a remoteproc device */ err = rproc_add(rproc); if (err) - goto free_rproc; + goto free_mem; return 0; +free_mem: + dma_free_coherent(rproc->dev.parent, SPROC_FW_SIZE, + sproc->fw_addr, sproc->fw_dma_addr); free_rproc: /* Reset device data upon error */ mdev->drv_data = NULL; From cde42e076c1cdd69a1f955d66ff6596bad3d2105 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= Date: Thu, 21 Feb 2013 18:15:32 +0100 Subject: [PATCH 061/303] remoteproc: fix error path of handle_vdev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the vdev entry from the list before freeing it, otherwise rproc->vdevs will explode. Cc: stable@vger.kernel.org Signed-off-by: Sjur Brændeland Acked-by: Ido Yariv [edit subject, minor commit log edit, cc stable] Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/remoteproc_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index fb0fb342bf96..8edb4aed5d36 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -366,10 +366,12 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc, /* it is now safe to add the virtio device */ ret = rproc_add_virtio_dev(rvdev, rsc->id); if (ret) - goto free_rvdev; + goto remove_rvdev; return 0; +remove_rvdev: + list_del(&rvdev->node); free_rvdev: kfree(rvdev); return ret; From d47cbd5bcee7c7a08fc0283dda465375fa1b1fda Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 5 Apr 2013 14:06:35 +0200 Subject: [PATCH 062/303] crypto: ux500 - add missing comma Commit 4f31f5b19eb0418a847b989abc9ac22af1991fe2 "PM / crypto / ux500: Use struct dev_pm_ops for power management" add a new line to the driver struct but missed to add a trailing comma, causing build errors when crypto is selected. This adds the missing comma. This was not noticed until now because the crypto block is not in the ux500 defconfig. A separate patch will be submitted to fix this. Cc: # 3.8.x Cc: Rafael J. Wysocki Cc: Magnus Myrstedt Signed-off-by: Linus Walleij Signed-off-by: Herbert Xu --- drivers/crypto/ux500/cryp/cryp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index 8bc5fef07e7a..22c9063e0120 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -1750,7 +1750,7 @@ static struct platform_driver cryp_driver = { .shutdown = ux500_cryp_shutdown, .driver = { .owner = THIS_MODULE, - .name = "cryp1" + .name = "cryp1", .pm = &ux500_cryp_pm, } }; From b844fe691897221ad0d5e0279c8ea9e3e4a46982 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 5 Apr 2013 15:36:32 +0100 Subject: [PATCH 063/303] dm cache: fix writes to cache device in writethrough mode The dm-cache writethrough strategy introduced by commit e2e74d617eadc15 ("dm cache: fix race in writethrough implementation") issues a bio to the origin device, remaps and then issues the bio to the cache device. This more conservative in-series approach was selected to favor correctness over performance (of the previous parallel writethrough). However, this in-series implementation that reuses the same bio to write both the origin and cache device didn't take into account that the block layer's req_bio_endio() modifies a completing bio's bi_sector and bi_size. So the new writethrough strategy needs to preserve these bio fields, and restore them before submission to the cache device, otherwise nothing gets written to the cache (because bi_size is 0). This patch adds a struct dm_bio_details field to struct per_bio_data, and uses dm_bio_record() and dm_bio_restore() to ensure the bio is restored before reissuing to the cache device. Adding such a large structure to the per_bio_data is not ideal but we can improve this later, for now correctness is the important thing. This problem initially went unnoticed because the dm-cache test-suite uses a linear DM device for the dm-cache device's origin device. Writethrough worked as expected because DM submits a *clone* of the original bio, so the original bio which was reused for the cache was never touched. Signed-off-by: Darrick J. Wong Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-target.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 66120bd46d15..1ab122a75764 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -6,6 +6,7 @@ #include "dm.h" #include "dm-bio-prison.h" +#include "dm-bio-record.h" #include "dm-cache-metadata.h" #include @@ -205,6 +206,7 @@ struct per_bio_data { struct cache *cache; dm_cblock_t cblock; bio_end_io_t *saved_bi_end_io; + struct dm_bio_details bio_details; }; struct dm_cache_migration { @@ -643,6 +645,7 @@ static void writethrough_endio(struct bio *bio, int err) return; } + dm_bio_restore(&pb->bio_details, bio); remap_to_cache(pb->cache, bio, pb->cblock); /* @@ -667,6 +670,7 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, pb->cache = cache; pb->cblock = cblock; pb->saved_bi_end_io = bio->bi_end_io; + dm_bio_record(&pb->bio_details, bio); bio->bi_end_io = writethrough_endio; remap_to_origin_clear_discard(pb->cache, bio, oblock); From 19b0092e265fe9ab129902373c3127c0e0be3376 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 5 Apr 2013 15:36:34 +0100 Subject: [PATCH 064/303] dm cache: reduce bio front_pad size in writeback mode A recent patch to fix the dm cache target's writethrough mode extended the bio's front_pad to include a 1056-byte struct dm_bio_details. Writeback mode doesn't need this, so this patch reduces the per_bio_data_size to 16 bytes in this case instead of 1096. The dm_bio_details structure was added in "dm cache: fix writes to cache device in writethrough mode" which fixed commit e2e74d617e ("dm cache: fix race in writethrough implementation"). In writeback mode we avoid allocating the writethrough-specific members of the per_bio_data structure (the dm_bio_details structure included). Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-target.c | 47 ++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1ab122a75764..10744091e6ca 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -202,7 +202,11 @@ struct per_bio_data { unsigned req_nr:2; struct dm_deferred_entry *all_io_entry; - /* writethrough fields */ + /* + * writethrough fields. These MUST remain at the end of this + * structure and the 'cache' member must be the first as it + * is used to determine the offsetof the writethrough fields. + */ struct cache *cache; dm_cblock_t cblock; bio_end_io_t *saved_bi_end_io; @@ -515,16 +519,28 @@ static void save_stats(struct cache *cache) /*---------------------------------------------------------------- * Per bio data *--------------------------------------------------------------*/ -static struct per_bio_data *get_per_bio_data(struct bio *bio) + +/* + * If using writeback, leave out struct per_bio_data's writethrough fields. + */ +#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) +#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) + +static size_t get_per_bio_data_size(struct cache *cache) { - struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; +} + +static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) +{ + struct per_bio_data *pb = dm_per_bio_data(bio, data_size); BUG_ON(!pb); return pb; } -static struct per_bio_data *init_per_bio_data(struct bio *bio) +static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) { - struct per_bio_data *pb = get_per_bio_data(bio); + struct per_bio_data *pb = get_per_bio_data(bio, data_size); pb->tick = false; pb->req_nr = dm_bio_get_target_bio_nr(bio); @@ -558,7 +574,8 @@ static void remap_to_cache(struct cache *cache, struct bio *bio, static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) { unsigned long flags; - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); spin_lock_irqsave(&cache->lock, flags); if (cache->need_tick_bio && @@ -637,7 +654,7 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio) static void writethrough_endio(struct bio *bio, int err) { - struct per_bio_data *pb = get_per_bio_data(bio); + struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); bio->bi_end_io = pb->saved_bi_end_io; if (err) { @@ -665,7 +682,7 @@ static void writethrough_endio(struct bio *bio, int err) static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, dm_oblock_t oblock, dm_cblock_t cblock) { - struct per_bio_data *pb = get_per_bio_data(bio); + struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); pb->cache = cache; pb->cblock = cblock; @@ -1039,7 +1056,8 @@ static void defer_bio(struct cache *cache, struct bio *bio) static void process_flush_bio(struct cache *cache, struct bio *bio) { - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); BUG_ON(bio->bi_size); if (!pb->req_nr) @@ -1111,7 +1129,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, dm_oblock_t block = get_bio_block(cache, bio); struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; struct policy_result lookup_result; - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); bool discarded_block = is_discarded_oblock(cache, block); bool can_migrate = discarded_block || spare_migration_bandwidth(cache); @@ -1885,7 +1904,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) cache->ti = ca->ti; ti->private = cache; - ti->per_bio_data_size = sizeof(struct per_bio_data); ti->num_flush_bios = 2; ti->flush_supported = true; @@ -1894,6 +1912,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->discard_zeroes_data_unsupported = true; memcpy(&cache->features, &ca->features, sizeof(cache->features)); + ti->per_bio_data_size = get_per_bio_data_size(cache); cache->callbacks.congested_fn = cache_is_congested; dm_table_add_target_callbacks(ti->table, &cache->callbacks); @@ -2096,6 +2115,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio) int r; dm_oblock_t block = get_bio_block(cache, bio); + size_t pb_data_size = get_per_bio_data_size(cache); bool can_migrate = false; bool discarded_block; struct dm_bio_prison_cell *cell; @@ -2112,7 +2132,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } - pb = init_per_bio_data(bio); + pb = init_per_bio_data(bio, pb_data_size); if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { defer_bio(cache, bio); @@ -2197,7 +2217,8 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) { struct cache *cache = ti->private; unsigned long flags; - struct per_bio_data *pb = get_per_bio_data(bio); + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); if (pb->tick) { policy_tick(cache->policy); From c10b90d85a5126d25c89cbaa50dc9fdd1c4d001a Mon Sep 17 00:00:00 2001 From: Li Fei Date: Fri, 5 Apr 2013 21:20:36 +0800 Subject: [PATCH 065/303] hwspinlock: fix __hwspin_lock_request error path Even in failed case of pm_runtime_get_sync, the usage_count is incremented. In order to keep the usage_count with correct value and runtime power management to behave correctly, call pm_runtime_put_noidle in such case. In __hwspin_lock_request, module_put is also called before return in pm_runtime_get_sync failed case. Cc: stable@vger.kernel.org Signed-off-by Liu Chuansheng Signed-off-by: Li Fei [edit commit log] Signed-off-by: Ohad Ben-Cohen --- drivers/hwspinlock/hwspinlock_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c index db713c0dfba4..461a0d739d75 100644 --- a/drivers/hwspinlock/hwspinlock_core.c +++ b/drivers/hwspinlock/hwspinlock_core.c @@ -416,6 +416,8 @@ static int __hwspin_lock_request(struct hwspinlock *hwlock) ret = pm_runtime_get_sync(dev); if (ret < 0) { dev_err(dev, "%s: can't power on device\n", __func__); + pm_runtime_put_noidle(dev); + module_put(dev->driver->owner); return ret; } From a58e0be6f6b3eb2079b0b8fedc9df6fa86869f1e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Mar 2013 12:52:59 -0400 Subject: [PATCH 066/303] SUNRPC: Remove extra xprt_put() While testing error cases where rpc_new_client() fails, I saw some oopses. If rpc_new_client() fails, it already invokes xprt_put(). Thus __rpc_clone_client() does not need to invoke it again. Introduced by commit 1b63a751 "SUNRPC: Refactor rpc_clone_client()" Fri Sep 14, 2012. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org [>=3.7] Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index dcc446e7fbf6..9df26b785dc7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -512,7 +512,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new = rpc_new_client(args, xprt); if (IS_ERR(new)) { err = PTR_ERR(new); - goto out_put; + goto out_err; } atomic_inc(&clnt->cl_count); @@ -525,8 +525,6 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_chatty = clnt->cl_chatty; return new; -out_put: - xprt_put(xprt); out_err: dprintk("RPC: %s: returned error %d\n", __func__, err); return ERR_PTR(err); From b193d59a4863ea670872d76dc99231ddeb598625 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Apr 2013 15:55:00 -0400 Subject: [PATCH 067/303] NFSv4: Fix a memory leak in nfs4_discover_server_trunking When we assign a new rpc_client to clp->cl_rpcclient, we need to destroy the old one. Signed-off-by: Trond Myklebust Cc: Chuck Lever Cc: stable@vger.kernel.org [>=3.7] --- fs/nfs/nfs4state.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6ace365c6334..d41a3518509f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1886,7 +1886,13 @@ again: status = PTR_ERR(clnt); break; } - clp->cl_rpcclient = clnt; + /* Note: this is safe because we haven't yet marked the + * client as ready, so we are the only user of + * clp->cl_rpcclient + */ + clnt = xchg(&clp->cl_rpcclient, clnt); + rpc_shutdown_client(clnt); + clnt = clp->cl_rpcclient; goto again; case -NFS4ERR_MINOR_VERS_MISMATCH: From 7b1f1fd1842e6ede25183c267ae733a7f67f00bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2013 16:11:11 -0400 Subject: [PATCH 068/303] NFSv4/4.1: Fix bugs in nfs4[01]_walk_client_list It is unsafe to use list_for_each_entry_safe() here, because when we drop the nn->nfs_client_lock, we pin the _current_ list entry and ensure that it stays in the list, but we don't do the same for the _next_ list entry. Use of list_for_each_entry() is therefore the correct thing to do. Also fix the refcounting in nfs41_walk_client_list(). Finally, ensure that the nfs_client has finished being initialised and, in the case of NFSv4.1, that the session is set up. Signed-off-by: Trond Myklebust Cc: Chuck Lever Cc: Bryan Schumaker Cc: stable@vger.kernel.org [>= 3.7] --- fs/nfs/nfs4client.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ac4fc9a8fdbc..c7b346f8cc44 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -300,7 +300,7 @@ int nfs40_walk_client_list(struct nfs_client *new, struct rpc_cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); - struct nfs_client *pos, *n, *prev = NULL; + struct nfs_client *pos, *prev = NULL; struct nfs4_setclientid_res clid = { .clientid = new->cl_clientid, .confirm = new->cl_confirm, @@ -308,10 +308,23 @@ int nfs40_walk_client_list(struct nfs_client *new, int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); - list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos" */ - if (pos->cl_cons_state < NFS_CS_READY) + if (pos->cl_cons_state > NFS_CS_READY) { + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + prev = pos; + + status = nfs_wait_client_init_complete(pos); + spin_lock(&nn->nfs_client_lock); + if (status < 0) + continue; + } + if (pos->cl_cons_state != NFS_CS_READY) continue; if (pos->rpc_ops != new->rpc_ops) @@ -423,16 +436,16 @@ int nfs41_walk_client_list(struct nfs_client *new, struct rpc_cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); - struct nfs_client *pos, *n, *prev = NULL; + struct nfs_client *pos, *prev = NULL; int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); - list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos", especially the client * ID and serverowner fields. Wait for CREATE_SESSION * to finish. */ - if (pos->cl_cons_state < NFS_CS_READY) { + if (pos->cl_cons_state > NFS_CS_READY) { atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); @@ -440,18 +453,17 @@ int nfs41_walk_client_list(struct nfs_client *new, nfs_put_client(prev); prev = pos; - nfs4_schedule_lease_recovery(pos); status = nfs_wait_client_init_complete(pos); - if (status < 0) { - nfs_put_client(pos); - spin_lock(&nn->nfs_client_lock); - continue; + if (status == 0) { + nfs4_schedule_lease_recovery(pos); + status = nfs4_wait_clnt_recover(pos); } - status = pos->cl_cons_state; spin_lock(&nn->nfs_client_lock); if (status < 0) continue; } + if (pos->cl_cons_state != NFS_CS_READY) + continue; if (pos->rpc_ops != new->rpc_ops) continue; @@ -469,17 +481,17 @@ int nfs41_walk_client_list(struct nfs_client *new, continue; atomic_inc(&pos->cl_count); - spin_unlock(&nn->nfs_client_lock); + *result = pos; dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); - - *result = pos; - return 0; + break; } /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); + if (prev) + nfs_put_client(prev); return status; } #endif /* CONFIG_NFS_V4_1 */ From 918708245e92941df16a634dc201b407d12bcd91 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 3 Apr 2013 15:47:33 +0100 Subject: [PATCH 069/303] x86: Fix rebuild with EFI_STUB enabled eboot.o and efi_stub_$(BITS).o didn't get added to "targets", and hence their .cmd files don't get included by the build machinery, leading to the files always getting rebuilt. Rather than adding the two files individually, take the opportunity and add $(VMLINUX_OBJS) to "targets" instead, thus allowing the assignment at the top of the file to be shrunk quite a bit. At the same time, remove a pointless flags override line - the variable assigned to was misspelled anyway, and the options added are meaningless for assembly sources. [ hpa: the patch is not minimal, but I am taking it for -urgent anyway since the excess impact of the patch seems to be small enough. ] Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/515C5D2502000078000CA6AD@nat28.tlf.novell.com Cc: Matthew Garrett Cc: Matt Fleming Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 8a84501acb1b..5ef205c5f37b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,7 +4,7 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC @@ -29,7 +29,6 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/piggy.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone -$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone ifeq ($(CONFIG_EFI_STUB), y) VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o @@ -43,7 +42,7 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) -targets += vmlinux.bin.all vmlinux.relocs +targets += $(patsubst $(obj)/%,%,$(VMLINUX_OBJS)) vmlinux.bin.all vmlinux.relocs CMD_RELOCS = arch/x86/tools/relocs quiet_cmd_relocs = RELOCS $@ From f05c124a70a4953a66acbd6d6c601ea1eb5d0fa7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2013 14:13:21 -0400 Subject: [PATCH 070/303] SUNRPC: Fix a potential memory leak in rpc_new_client If the call to rpciod_up() fails, we currently leak a reference to the struct rpc_xprt. As part of the fix, we also remove the redundant check for xprt!=NULL. This is already taken care of by the callers. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9df26b785dc7..d5f35f15af98 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -304,10 +304,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru err = rpciod_up(); if (err) goto out_no_rpciod; - err = -EINVAL; - if (!xprt) - goto out_no_xprt; + err = -EINVAL; if (args->version >= program->nrvers) goto out_err; version = program->version[args->version]; @@ -382,10 +380,9 @@ out_no_principal: out_no_stats: kfree(clnt); out_err: - xprt_put(xprt); -out_no_xprt: rpciod_down(); out_no_rpciod: + xprt_put(xprt); return ERR_PTR(err); } From f19799f49d1a03f39d5e53fa1bf05db4820c86c2 Mon Sep 17 00:00:00 2001 From: "wenxiong@linux.vnet.ibm.com" Date: Wed, 27 Feb 2013 12:37:45 -0600 Subject: [PATCH 071/303] [SCSI] ipr: ipr_test_msi() fails when running with msi-x enabled adapter Loading ipr modules failed(-22) with msi-x enabled adapter. In ipr_test_msi(), We need to pass the first vector of msix vectors instead of using pdev->irq to request_irq() when adapter enables msix feature. Signed-off-by: Wen Xiong Acked-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ipr.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index f328089a1060..fadf1528db3d 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9349,7 +9349,10 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) int_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg); spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); - rc = request_irq(pdev->irq, ipr_test_intr, 0, IPR_NAME, ioa_cfg); + if (ioa_cfg->intr_flag == IPR_USE_MSIX) + rc = request_irq(ioa_cfg->vectors_info[0].vec, ipr_test_intr, 0, IPR_NAME, ioa_cfg); + else + rc = request_irq(pdev->irq, ipr_test_intr, 0, IPR_NAME, ioa_cfg); if (rc) { dev_err(&pdev->dev, "Can not assign irq %d\n", pdev->irq); return rc; @@ -9371,7 +9374,10 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); - free_irq(pdev->irq, ioa_cfg); + if (ioa_cfg->intr_flag == IPR_USE_MSIX) + free_irq(ioa_cfg->vectors_info[0].vec, ioa_cfg); + else + free_irq(pdev->irq, ioa_cfg); LEAVE; From 95c9f4d4da6c4e445a9dd58c8382356520ea91a0 Mon Sep 17 00:00:00 2001 From: John Gong Date: Wed, 6 Mar 2013 10:43:03 +0800 Subject: [PATCH 072/303] [SCSI] libsas: use right function to alloc smp response In fact the disc_resp buffer will be overwrite by smp response, so we never found this typo, correct it by using the right one. Signed-off-by: John Gong Signed-off-by: Jack Wang Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_expander.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index aec2e0da5016..588affd6fde7 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -388,7 +388,7 @@ int sas_ex_phy_discover(struct domain_device *dev, int single) if (!disc_req) return -ENOMEM; - disc_resp = alloc_smp_req(DISCOVER_RESP_SIZE); + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); if (!disc_resp) { kfree(disc_req); return -ENOMEM; From 2b5bebccd282f2527d17ac90cf3ad0d486dd89d8 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Tue, 5 Mar 2013 10:57:56 -0500 Subject: [PATCH 073/303] [SCSI] st: Take additional queue ref in st_probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a reference count bug in the SCSI tape driver which can be reproduced with the following: * Boot with slub_debug=FZPU, tape drive attached * echo 1 > /sys/devices/... tape device pci path .../remove * Wait for device removal * echo 1 > /sys/kernel/slab/blkdev_queue/validate * Slub debug complains about corrupted poison pattern In commit 523e1d39 (block: make gendisk hold a reference to its queue) add_disk() and disk_release() were modified to get/put an additional reference on a disk queue to fix a reference counting discrepency between bdev release and SCSI device removal. The ST driver never calls add_disk(), so this commit introduced an extra kref put when the ST driver frees its struct gendisk. Attempts were made to fix this bug at the block level [1] but later abandoned due to floppy driver issues [2]. [1] https://lkml.org/lkml/2012/8/27/354 [2] https://lkml.org/lkml/2012/9/22/113 Signed-off-by: Joe Lawrence Tested-by: Ewan D. Milne Acked-by: Kai Mäkisara Signed-off-by: James Bottomley --- drivers/scsi/st.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 86974471af68..2a32036a9404 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4112,6 +4112,10 @@ static int st_probe(struct device *dev) tpnt->disk = disk; disk->private_data = &tpnt->driver; disk->queue = SDp->request_queue; + /* SCSI tape doesn't register this gendisk via add_disk(). Manually + * take queue reference that release_disk() expects. */ + if (!blk_get_queue(disk->queue)) + goto out_put_disk; tpnt->driver = &st_template; tpnt->device = SDp; @@ -4185,7 +4189,7 @@ static int st_probe(struct device *dev) idr_preload_end(); if (error < 0) { pr_warn("st: idr allocation failed: %d\n", error); - goto out_put_disk; + goto out_put_queue; } tpnt->index = error; sprintf(disk->disk_name, "st%d", tpnt->index); @@ -4211,6 +4215,8 @@ out_remove_devs: spin_lock(&st_index_lock); idr_remove(&st_index_idr, tpnt->index); spin_unlock(&st_index_lock); +out_put_queue: + blk_put_queue(disk->queue); out_put_disk: put_disk(disk); kfree(tpnt); From c4ee22a3708d54d8fb5ee8a23ca08e74abcaedfd Mon Sep 17 00:00:00 2001 From: Kleber Sacilotto de Souza Date: Thu, 14 Mar 2013 13:52:23 -0500 Subject: [PATCH 074/303] [SCSI] ipr: fix addition of abort command to HRRQ free queue The abort command issued by ipr_cancel_op() is being added to the wrong HRRQ free queue after the command returns. Fix it by using the HRRQ pointer in the ipr command struct itself. Signed-off-by: Kleber Sacilotto de Souza Signed-off-by: Wen Xiong Acked-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ipr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index fadf1528db3d..21e0a0ae43a8 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -5148,7 +5148,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) ipr_trace; } - list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (!ipr_is_naca_model(res)) res->needs_sync_complete = 1; From 9077a944bc3c8bea45a91dc2caa65e96ac4c5f87 Mon Sep 17 00:00:00 2001 From: "wenxiong@linux.vnet.ibm.com" Date: Thu, 14 Mar 2013 13:52:24 -0500 Subject: [PATCH 075/303] [SCSI] ipr: dlpar failed when adding an adapter back Reinitialize resource queue prior to freeing resource entries to ensure they are not referenced. This fixes an issue with target_destoy accessing memory after it was freed. Signed-off-by: Wen Xiong Acked-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ipr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 21e0a0ae43a8..2197b57fb225 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9728,6 +9728,7 @@ static void __ipr_remove(struct pci_dev *pdev) spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); flush_work(&ioa_cfg->work_q); + INIT_LIST_HEAD(&ioa_cfg->used_res_q); spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); spin_lock(&ipr_driver_lock); From e9f4f418076f7d08a73667472b5f74a02965221d Mon Sep 17 00:00:00 2001 From: Joe Carnuccio Date: Mon, 25 Mar 2013 02:21:36 -0400 Subject: [PATCH 076/303] [SCSI] Revert "qla2xxx: Add setting of driver version string for vendor application." The original patch was not covering all the adapters and firmwares. This commit reverts 3a11711ad00caebee07e262d188cea66f3473c38. Signed-off-by: Joe Carnuccio Signed-off-by: Saurav Kashyap Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_dbg.c | 1 + drivers/scsi/qla2xxx/qla_def.h | 1 - drivers/scsi/qla2xxx/qla_gbl.h | 3 -- drivers/scsi/qla2xxx/qla_init.c | 2 -- drivers/scsi/qla2xxx/qla_mbx.c | 58 --------------------------------- 5 files changed, 1 insertion(+), 64 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 1626de52e32a..2bf02147bcce 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -15,6 +15,7 @@ * | Mailbox commands | 0x115b | 0x111a-0x111b | * | | | 0x112c-0x112e | * | | | 0x113a | + * | | | 0x1155-0x1158 | * | Device Discovery | 0x2087 | 0x2020-0x2022, | * | | | 0x2016 | * | Queue Command and IO tracing | 0x3031 | 0x3006-0x300b | diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index c6509911772b..65c5ff75936b 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -863,7 +863,6 @@ typedef struct { #define MBX_1 BIT_1 #define MBX_0 BIT_0 -#define RNID_TYPE_SET_VERSION 0x9 #define RNID_TYPE_ASIC_TEMP 0xC /* diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index eb3ca21a7f17..b310fa97b545 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -357,9 +357,6 @@ qla2x00_enable_fce_trace(scsi_qla_host_t *, dma_addr_t, uint16_t , uint16_t *, extern int qla2x00_disable_fce_trace(scsi_qla_host_t *, uint64_t *, uint64_t *); -extern int -qla2x00_set_driver_version(scsi_qla_host_t *, char *); - extern int qla2x00_read_sfp(scsi_qla_host_t *, dma_addr_t, uint8_t *, uint16_t, uint16_t, uint16_t, uint16_t); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index edf4d14a1335..09eae54387fa 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -619,8 +619,6 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha) if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha)) qla24xx_read_fcp_prio_cfg(vha); - qla2x00_set_driver_version(vha, QLA2XXX_VERSION); - return (rval); } diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 186dd59ce4fa..43345af56431 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3866,64 +3866,6 @@ qla81xx_restart_mpi_firmware(scsi_qla_host_t *vha) return rval; } -int -qla2x00_set_driver_version(scsi_qla_host_t *vha, char *version) -{ - int rval; - mbx_cmd_t mc; - mbx_cmd_t *mcp = &mc; - int len; - uint16_t dwlen; - uint8_t *str; - dma_addr_t str_dma; - struct qla_hw_data *ha = vha->hw; - - if (!IS_FWI2_CAPABLE(ha) || IS_QLA82XX(ha)) - return QLA_FUNCTION_FAILED; - - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1155, - "Entered %s.\n", __func__); - - str = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &str_dma); - if (!str) { - ql_log(ql_log_warn, vha, 0x1156, - "Failed to allocate driver version param.\n"); - return QLA_MEMORY_ALLOC_FAILED; - } - - memcpy(str, "\x7\x3\x11\x0", 4); - dwlen = str[0]; - len = dwlen * sizeof(uint32_t) - 4; - memset(str + 4, 0, len); - if (len > strlen(version)) - len = strlen(version); - memcpy(str + 4, version, len); - - mcp->mb[0] = MBC_SET_RNID_PARAMS; - mcp->mb[1] = RNID_TYPE_SET_VERSION << 8 | dwlen; - mcp->mb[2] = MSW(LSD(str_dma)); - mcp->mb[3] = LSW(LSD(str_dma)); - mcp->mb[6] = MSW(MSD(str_dma)); - mcp->mb[7] = LSW(MSD(str_dma)); - mcp->out_mb = MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0; - mcp->in_mb = MBX_0; - mcp->tov = MBX_TOV_SECONDS; - mcp->flags = 0; - rval = qla2x00_mailbox_command(vha, mcp); - - if (rval != QLA_SUCCESS) { - ql_dbg(ql_dbg_mbx, vha, 0x1157, - "Failed=%x mb[0]=%x.\n", rval, mcp->mb[0]); - } else { - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1158, - "Done %s.\n", __func__); - } - - dma_pool_free(ha->s_dma_pool, str, str_dma); - - return rval; -} - static int qla2x00_read_asic_temperature(scsi_qla_host_t *vha, uint16_t *temp) { From 00876ae85b47cdfe76aa08b1fffb27fd67425f53 Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Mon, 25 Mar 2013 02:21:37 -0400 Subject: [PATCH 077/303] [SCSI] qla2xxx: Fix crash during firmware dump procedure. System crashes, in initiator mode operation, with qla2xxx_copy_atioqueues() in stack trace when firmware dump is attempted. Check for atio_q_length alone does not indicate if atio_ring is allocated, make explicit check of atio_ring to avoid the crash. Applicable to ISP24xx, ISP25xx, ISP81xx & ISP83xx line of HBAs. Signed-off-by: Arun Easi Signed-off-by: Saurav Kashyap Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_dbg.c | 2 +- drivers/scsi/qla2xxx/qla_init.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 2bf02147bcce..fbc305f1c87c 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -402,7 +402,7 @@ qla2xxx_copy_atioqueues(struct qla_hw_data *ha, void *ptr, void *ring; } aq, *aqp; - if (!ha->tgt.atio_q_length) + if (!ha->tgt.atio_ring) return ptr; num_queues = 1; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 09eae54387fa..b59203393cb2 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1397,7 +1397,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mq_size += ha->max_rsp_queues * (rsp->length * sizeof(response_t)); } - if (ha->tgt.atio_q_length) + if (ha->tgt.atio_ring) mq_size += ha->tgt.atio_q_length * sizeof(request_t); /* Allocate memory for Fibre Channel Event Buffer. */ if (!IS_QLA25XX(ha) && !IS_QLA81XX(ha) && !IS_QLA83XX(ha)) From b03808fdbf98e41391efb732108a6d2fb4e30a76 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 25 Mar 2013 02:21:38 -0400 Subject: [PATCH 078/303] [SCSI] qla2xxx: Update copyright dates information in LICENSE.qla2xxx file. Signed-off-by: Giridhar Malavali Signed-off-by: Saurav Kashyap Signed-off-by: James Bottomley --- Documentation/scsi/LICENSE.qla2xxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/scsi/LICENSE.qla2xxx b/Documentation/scsi/LICENSE.qla2xxx index 27a91cf43d6d..5020b7b5a244 100644 --- a/Documentation/scsi/LICENSE.qla2xxx +++ b/Documentation/scsi/LICENSE.qla2xxx @@ -1,4 +1,4 @@ -Copyright (c) 2003-2012 QLogic Corporation +Copyright (c) 2003-2013 QLogic Corporation QLogic Linux FC-FCoE Driver This program includes a device driver for Linux 3.x. From 4a9fa41bd404b8a572e4af7d2221e423bb729e52 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 25 Mar 2013 02:21:39 -0400 Subject: [PATCH 079/303] [SCSI] qla2xxx: Remove debug code that msleeps for random duration. Signed-off-by: Giridhar Malavali Signed-off-by: Saurav Kashyap Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_attr.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 1d82eef4e1eb..b3db9dcc2619 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1938,11 +1938,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) "Timer for the VP[%d] has stopped\n", vha->vp_idx); } - /* No pending activities shall be there on the vha now */ - if (ql2xextended_error_logging & ql_dbg_user) - msleep(random32()%10); /* Just to see if something falls on - * the net we have placed below */ - BUG_ON(atomic_read(&vha->vref_count)); qla2x00_free_fcports(vha); From 231ff54e4cc4a6f1ef78fb4e1f94957bbb961aae Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 25 Mar 2013 02:21:40 -0400 Subject: [PATCH 080/303] [SCSI] qla2xxx: Update the driver version to 8.04.00.13-k. Signed-off-by: Giridhar Malavali Signed-off-by: Saurav Kashyap Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index 2b6e478d9e33..ec54036d1e12 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -7,7 +7,7 @@ /* * Driver version */ -#define QLA2XXX_VERSION "8.04.00.08-k" +#define QLA2XXX_VERSION "8.04.00.13-k" #define QLA_DRIVER_MAJOR_VER 8 #define QLA_DRIVER_MINOR_VER 4 From 9d85b590057523c2bfe3bea2f62ba9e2701a62db Mon Sep 17 00:00:00 2001 From: Brian King Date: Mon, 1 Apr 2013 09:44:26 -0500 Subject: [PATCH 081/303] [SCSI] ibmvscsi: Fix slave_configure deadlock No locks should be held when calling scsi_adjust_queue_depth so drop the lock in slave_configure prior to calling it. Signed-off-by: Brian King Acked-by: Robert Jennings Signed-off-by: James Bottomley --- drivers/scsi/ibmvscsi/ibmvscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index a044f593e8b9..d0fa4b6c551f 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1899,8 +1899,8 @@ static int ibmvscsi_slave_configure(struct scsi_device *sdev) sdev->allow_restart = 1; blk_queue_rq_timeout(sdev->request_queue, 120 * HZ); } - scsi_adjust_queue_depth(sdev, 0, shost->cmd_per_lun); spin_unlock_irqrestore(shost->host_lock, lock_flags); + scsi_adjust_queue_depth(sdev, 0, shost->cmd_per_lun); return 0; } From d4a2618fa77b5e58ec15342972bd3505a1c3f551 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 3 Apr 2013 10:27:17 +0200 Subject: [PATCH 082/303] [SCSI] libsas: fix handling vacant phy in sas_set_ex_phy() If a result of the SMP discover function is PHY VACANT, the content of discover response structure (dr) is not valid. It sometimes happens that dr->attached_sas_addr can contain even SAS address of other phy. In such case an invalid phy is created, what causes NULL pointer dereference during destruction of expander's phys. So if a result of SMP function is PHY VACANT, the content of discover response structure (dr) must not be copied to phy structure. This patch fixes the following bug: BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 IP: [] sysfs_find_dirent+0x12/0x90 Call Trace: [] sysfs_get_dirent+0x35/0x80 [] sysfs_unmerge_group+0x1e/0xb0 [] dpm_sysfs_remove+0x24/0x90 [] device_del+0x44/0x1d0 [] sas_rphy_delete+0x9/0x20 [scsi_transport_sas] [] sas_destruct_devices+0xe6/0x110 [libsas] [] process_one_work+0x16c/0x350 [] worker_thread+0x17a/0x410 [] kthread+0x96/0xa0 [] kernel_thread_helper+0x4/0x10 Signed-off-by: Lukasz Dorau Signed-off-by: Pawel Baldysiak Reviewed-by: Maciej Patelczyk Cc: Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_expander.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 588affd6fde7..55cbd0180159 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -235,6 +235,17 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, void *rsp) linkrate = phy->linkrate; memcpy(sas_addr, phy->attached_sas_addr, SAS_ADDR_SIZE); + /* Handle vacant phy - rest of dr data is not valid so skip it */ + if (phy->phy_state == PHY_VACANT) { + memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); + phy->attached_dev_type = NO_DEVICE; + if (!test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state)) { + phy->phy_id = phy_id; + goto skip; + } else + goto out; + } + phy->attached_dev_type = to_dev_type(dr); if (test_bit(SAS_HA_ATA_EH_ACTIVE, &ha->state)) goto out; @@ -272,6 +283,7 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, void *rsp) phy->phy->maximum_linkrate = dr->pmax_linkrate; phy->phy->negotiated_linkrate = phy->linkrate; + skip: if (new_phy) if (sas_phy_add(phy->phy)) { sas_phy_free(phy->phy); From 3a7b21eaf4fb3c971bdb47a98f570550ddfe4471 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 5 Apr 2013 08:13:30 +0000 Subject: [PATCH 083/303] netfilter: nf_ct_sip: don't drop packets with offsets pointing outside the packet Some Cisco phones create huge messages that are spread over multiple packets. After calculating the offset of the SIP body, it is validated to be within the packet and the packet is dropped otherwise. This breaks operation of these phones. Since connection tracking is supposed to be passive, just let those packets pass unmodified and untracked. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 0e7d423324c3..e0c4373b4747 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1593,10 +1593,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, end += strlen("\r\n\r\n") + clen; msglen = origlen = end - dptr; - if (msglen > datalen) { - nf_ct_helper_log(skb, ct, "incomplete/bad SIP message"); - return NF_DROP; - } + if (msglen > datalen) + return NF_ACCEPT; ret = process_sip_msg(skb, ct, protoff, dataoff, &dptr, &msglen); From 889d66848b12d891248b03abcb2a42047f8e172a Mon Sep 17 00:00:00 2001 From: Eldad Zack Date: Fri, 5 Apr 2013 20:49:46 +0200 Subject: [PATCH 084/303] ALSA: usb-audio: fix endianness bug in snd_nativeinstruments_* The usb_control_msg() function expects __u16 types and performs the endianness conversions by itself. However, in three places, a conversion is performed before it is handed over to usb_control_msg(), which leads to a double conversion (= no conversion): * snd_usb_nativeinstruments_boot_quirk() * snd_nativeinstruments_control_get() * snd_nativeinstruments_control_put() Caught by sparse: sound/usb/mixer_quirks.c:512:38: warning: incorrect type in argument 6 (different base types) sound/usb/mixer_quirks.c:512:38: expected unsigned short [unsigned] [usertype] index sound/usb/mixer_quirks.c:512:38: got restricted __le16 [usertype] sound/usb/mixer_quirks.c:543:35: warning: incorrect type in argument 5 (different base types) sound/usb/mixer_quirks.c:543:35: expected unsigned short [unsigned] [usertype] value sound/usb/mixer_quirks.c:543:35: got restricted __le16 [usertype] sound/usb/mixer_quirks.c:543:56: warning: incorrect type in argument 6 (different base types) sound/usb/mixer_quirks.c:543:56: expected unsigned short [unsigned] [usertype] index sound/usb/mixer_quirks.c:543:56: got restricted __le16 [usertype] sound/usb/quirks.c:502:35: warning: incorrect type in argument 5 (different base types) sound/usb/quirks.c:502:35: expected unsigned short [unsigned] [usertype] value sound/usb/quirks.c:502:35: got restricted __le16 [usertype] Signed-off-by: Eldad Zack Acked-by: Daniel Mack Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 4 ++-- sound/usb/quirks.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 497d2741d119..ebe91440a068 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -509,7 +509,7 @@ static int snd_nativeinstruments_control_get(struct snd_kcontrol *kcontrol, else ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), bRequest, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0, cpu_to_le16(wIndex), + 0, wIndex, &tmp, sizeof(tmp), 1000); up_read(&mixer->chip->shutdown_rwsem); @@ -540,7 +540,7 @@ static int snd_nativeinstruments_control_put(struct snd_kcontrol *kcontrol, else ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), bRequest, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - cpu_to_le16(wValue), cpu_to_le16(wIndex), + wValue, wIndex, NULL, 0, 1000); up_read(&mixer->chip->shutdown_rwsem); diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 5325a3869bb7..9c5ab22358b1 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -486,7 +486,7 @@ static int snd_usb_nativeinstruments_boot_quirk(struct usb_device *dev) { int ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0xaf, USB_TYPE_VENDOR | USB_RECIP_DEVICE, - cpu_to_le16(1), 0, NULL, 0, 1000); + 1, 0, NULL, 0, 1000); if (ret < 0) return ret; From 8f964525a121f2ff2df948dac908dcc65be21b5b Mon Sep 17 00:00:00 2001 From: Andrew Honig Date: Fri, 29 Mar 2013 09:35:21 -0700 Subject: [PATCH 085/303] KVM: Allow cross page reads and writes from cached translations. This patch adds support for kvm_gfn_to_hva_cache_init functions for reads and writes that will cross a page. If the range falls within the same memslot, then this will be a fast operation. If the range is split between two memslots, then the slower kvm_read_guest and kvm_write_guest are used. Tested: Test against kvm_clock unit tests. Signed-off-by: Andrew Honig Signed-off-by: Gleb Natapov --- arch/x86/kvm/lapic.c | 2 +- arch/x86/kvm/x86.c | 13 +++++------ include/linux/kvm_host.h | 2 +- include/linux/kvm_types.h | 1 + virt/kvm/kvm_main.c | 47 ++++++++++++++++++++++++++++++--------- 5 files changed, 46 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd4e4ad..f77df1c5de6e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1857,7 +1857,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) if (!pv_eoi_enabled(vcpu)) return 0; return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, - addr); + addr, sizeof(u8)); } void kvm_lapic_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f19ac0aca60d..e1721324c271 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1823,7 +1823,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) return 0; } - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa)) + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, + sizeof(u32))) return 1; vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); @@ -1952,12 +1953,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) gpa_offset = data & ~(PAGE_MASK | 1); - /* Check that the address is 32-byte aligned. */ - if (gpa_offset & (sizeof(struct pvclock_vcpu_time_info) - 1)) - break; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, - &vcpu->arch.pv_time, data & ~1ULL)) + &vcpu->arch.pv_time, data & ~1ULL, + sizeof(struct pvclock_vcpu_time_info))) vcpu->arch.pv_time_enabled = false; else vcpu->arch.pv_time_enabled = true; @@ -1977,7 +1975,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, - data & KVM_STEAL_VALID_BITS)) + data & KVM_STEAL_VALID_BITS, + sizeof(struct kvm_steal_time))) return 1; vcpu->arch.st.msr_val = data; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cad77fe09d77..c13958251927 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -518,7 +518,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - gpa_t gpa); + gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index fa7cc7244cbd..b0bcce0ddc95 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -71,6 +71,7 @@ struct gfn_to_hva_cache { u64 generation; gpa_t gpa; unsigned long hva; + unsigned long len; struct kvm_memory_slot *memslot; }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index adc68feb5c5a..f18013f09e68 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1541,21 +1541,38 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, } int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - gpa_t gpa) + gpa_t gpa, unsigned long len) { struct kvm_memslots *slots = kvm_memslots(kvm); int offset = offset_in_page(gpa); - gfn_t gfn = gpa >> PAGE_SHIFT; + gfn_t start_gfn = gpa >> PAGE_SHIFT; + gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; + gfn_t nr_pages_needed = end_gfn - start_gfn + 1; + gfn_t nr_pages_avail; ghc->gpa = gpa; ghc->generation = slots->generation; - ghc->memslot = gfn_to_memslot(kvm, gfn); - ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); - if (!kvm_is_error_hva(ghc->hva)) + ghc->len = len; + ghc->memslot = gfn_to_memslot(kvm, start_gfn); + ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); + if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) { ghc->hva += offset; - else - return -EFAULT; - + } else { + /* + * If the requested region crosses two memslots, we still + * verify that the entire region is valid here. + */ + while (start_gfn <= end_gfn) { + ghc->memslot = gfn_to_memslot(kvm, start_gfn); + ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, + &nr_pages_avail); + if (kvm_is_error_hva(ghc->hva)) + return -EFAULT; + start_gfn += nr_pages_avail; + } + /* Use the slow path for cross page reads and writes. */ + ghc->memslot = NULL; + } return 0; } EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); @@ -1566,8 +1583,13 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, struct kvm_memslots *slots = kvm_memslots(kvm); int r; + BUG_ON(len > ghc->len); + if (slots->generation != ghc->generation) - kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa); + kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); + + if (unlikely(!ghc->memslot)) + return kvm_write_guest(kvm, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -1587,8 +1609,13 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, struct kvm_memslots *slots = kvm_memslots(kvm); int r; + BUG_ON(len > ghc->len); + if (slots->generation != ghc->generation) - kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa); + kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); + + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; From c7426bce5933d16b492a34e42ae77e26fceddff6 Mon Sep 17 00:00:00 2001 From: Robert Tivy Date: Thu, 28 Mar 2013 18:41:43 -0700 Subject: [PATCH 086/303] remoteproc: fix FW_CONFIG typo Fix obvious typo introduced in commit e121aefa7d9f10eee5cf26ed47129237a05d940b ("remoteproc: fix missing CONFIG_FW_LOADER configurations"). Cc: stable@vger.kernel.org Signed-off-by: Robert Tivy [cc stable, slight subject change] Signed-off-by: Ohad Ben-Cohen --- drivers/remoteproc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index cc1f7bf53fd0..c6d77e20622c 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -4,7 +4,7 @@ menu "Remoteproc drivers" config REMOTEPROC tristate depends on HAS_DMA - select FW_CONFIG + select FW_LOADER select VIRTIO config OMAP_REMOTEPROC From aa8b4be3ac049c8b1df2a87e4d1d902ccfc1f7a9 Mon Sep 17 00:00:00 2001 From: Jay Estabrook Date: Sun, 7 Apr 2013 21:36:09 +1200 Subject: [PATCH 087/303] alpha: Add irongate_io to PCI bus resources Fixes a NULL pointer dereference at boot on UP1500. Cc: stable@vger.kernel.org Reviewed-and-Tested-by: Matt Turner Signed-off-by: Jay Estabrook Signed-off-by: Matt Turner Signed-off-by: Michael Cree Signed-off-by: Linus Torvalds --- arch/alpha/kernel/sys_nautilus.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 4d4c046f708d..1383f8601a93 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -188,6 +188,10 @@ nautilus_machine_check(unsigned long vector, unsigned long la_ptr) extern void free_reserved_mem(void *, void *); extern void pcibios_claim_one_bus(struct pci_bus *); +static struct resource irongate_io = { + .name = "Irongate PCI IO", + .flags = IORESOURCE_IO, +}; static struct resource irongate_mem = { .name = "Irongate PCI MEM", .flags = IORESOURCE_MEM, @@ -209,6 +213,7 @@ nautilus_init_pci(void) irongate = pci_get_bus_and_slot(0, 0); bus->self = irongate; + bus->resource[0] = &irongate_io; bus->resource[1] = &irongate_mem; pci_bus_size_bridges(bus); From cd8d2331756751b6aeb855a3c9cb0a92fbd9c725 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sun, 7 Apr 2013 21:36:10 +1200 Subject: [PATCH 088/303] alpha: makefile: don't enforce small data model for kernel builds Due to all of the goodness being packed into today's kernels, the resulting image isn't as slim as it once was. In light of this, don't pass -msmall-data to gcc, which otherwise results in link failures due to impossible relocations when compiling anything but the most trivial configurations. Cc: Richard Henderson Cc: Ivan Kokshaysky Reviewed-by: Matt Turner Tested-by: Thorsten Kranzkowski Signed-off-by: Will Deacon Signed-off-by: Michael Cree Signed-off-by: Linus Torvalds --- arch/alpha/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile index 4759fe751aa1..2cc3cc519c54 100644 --- a/arch/alpha/Makefile +++ b/arch/alpha/Makefile @@ -12,7 +12,7 @@ NM := $(NM) -B LDFLAGS_vmlinux := -static -N #-relax CHECKFLAGS += -D__alpha__ -m64 -cflags-y := -pipe -mno-fp-regs -ffixed-8 -msmall-data +cflags-y := -pipe -mno-fp-regs -ffixed-8 cflags-y += $(call cc-option, -fno-jump-tables) cpuflags-$(CONFIG_ALPHA_EV4) := -mcpu=ev4 From e20800fd5cec2a75639a32e956b1cdc023cb87ce Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sun, 7 Apr 2013 21:36:11 +1200 Subject: [PATCH 089/303] alpha: irq: run all handlers with interrupts disabled Linux has expected that interrupt handlers are executed with local interrupts disabled for a while now, so ensure that this is the case on Alpha even for non-device interrupts such as IPIs. Without this patch, secondary boot results in the following backtrace: warning: at kernel/softirq.c:139 __local_bh_enable+0xb8/0xd0() trace: __local_bh_enable+0xb8/0xd0 irq_enter+0x74/0xa0 scheduler_ipi+0x50/0x100 handle_ipi+0x84/0x260 do_entint+0x1ac/0x2e0 irq_exit+0x60/0xa0 handle_irq+0x98/0x100 do_entint+0x2c8/0x2e0 ret_from_sys_call+0x0/0x10 load_balance+0x3e4/0x870 cpu_idle+0x24/0x80 rcu_eqs_enter_common.isra.38+0x0/0x120 cpu_idle+0x40/0x80 rest_init+0xc0/0xe0 _stext+0x1c/0x20 A similar dump occurs if you try to reboot using magic-sysrq. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Reviewed-by: Matt Turner Signed-off-by: Will Deacon Signed-off-by: Michael Cree Signed-off-by: Linus Torvalds --- arch/alpha/kernel/irq.c | 7 ------- arch/alpha/kernel/irq_alpha.c | 9 ++++++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 2872accd2215..7b2be251c30f 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -117,13 +117,6 @@ handle_irq(int irq) return; } - /* - * From here we must proceed with IPL_MAX. Note that we do not - * explicitly enable interrupts afterwards - some MILO PALcode - * (namely LX164 one) seems to have severe problems with RTI - * at IPL 0. - */ - local_irq_disable(); irq_enter(); generic_handle_irq_desc(irq, desc); irq_exit(); diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 772ddfdb71a8..1216dfb4fcc5 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -45,6 +45,14 @@ do_entInt(unsigned long type, unsigned long vector, unsigned long la_ptr, struct pt_regs *regs) { struct pt_regs *old_regs; + + /* + * Disable interrupts during IRQ handling. + * Note that there is no matching local_irq_enable() due to + * severe problems with RTI at IPL0 and some MILO PALcode + * (namely LX164). + */ + local_irq_disable(); switch (type) { case 0: #ifdef CONFIG_SMP @@ -62,7 +70,6 @@ do_entInt(unsigned long type, unsigned long vector, { long cpu; - local_irq_disable(); smp_percpu_timer_interrupt(regs); cpu = smp_processor_id(); if (cpu != boot_cpuid) { From e74e25929ce4d4f5f5f4bae585b214bacef6960b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sun, 7 Apr 2013 21:36:12 +1200 Subject: [PATCH 090/303] alpha: irq: remove deprecated use of IRQF_DISABLED Interrupt handlers are always invoked with interrupts disabled, so remove all uses of the deprecated IRQF_DISABLED flag. Cc: Richard Henderson Cc: Ivan Kokshaysky Reviewed-by: Matt Turner Signed-off-by: Will Deacon Signed-off-by: Michael Cree Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/floppy.h | 2 +- arch/alpha/kernel/irq_alpha.c | 1 - arch/alpha/kernel/sys_titan.c | 14 +++++++------- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/alpha/include/asm/floppy.h b/arch/alpha/include/asm/floppy.h index 46cefbd50e73..bae97eb19d26 100644 --- a/arch/alpha/include/asm/floppy.h +++ b/arch/alpha/include/asm/floppy.h @@ -26,7 +26,7 @@ #define fd_disable_irq() disable_irq(FLOPPY_IRQ) #define fd_cacheflush(addr,size) /* nothing */ #define fd_request_irq() request_irq(FLOPPY_IRQ, floppy_interrupt,\ - IRQF_DISABLED, "floppy", NULL) + 0, "floppy", NULL) #define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) #ifdef CONFIG_PCI diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 1216dfb4fcc5..f433fc11877a 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -229,7 +229,6 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr, struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED, .name = "timer", }; diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 5cf4a481b8c5..a53cf03f49d5 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -280,15 +280,15 @@ titan_late_init(void) * all reported to the kernel as machine checks, so the handler * is a nop so it can be called to count the individual events. */ - titan_request_irq(63+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(63+16, titan_intr_nop, 0, "CChip Error", NULL); - titan_request_irq(62+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(62+16, titan_intr_nop, 0, "PChip 0 H_Error", NULL); - titan_request_irq(61+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(61+16, titan_intr_nop, 0, "PChip 1 H_Error", NULL); - titan_request_irq(60+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(60+16, titan_intr_nop, 0, "PChip 0 C_Error", NULL); - titan_request_irq(59+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(59+16, titan_intr_nop, 0, "PChip 1 C_Error", NULL); /* @@ -348,9 +348,9 @@ privateer_init_pci(void) * Hook a couple of extra err interrupts that the * common titan code won't. */ - titan_request_irq(53+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(53+16, titan_intr_nop, 0, "NMI", NULL); - titan_request_irq(50+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(50+16, titan_intr_nop, 0, "Temperature Warning", NULL); /* From 31880c37c11e28cb81c70757e38392b42e695dc6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Apr 2013 20:49:54 -0700 Subject: [PATCH 091/303] Linux 3.9-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 58a165b02af1..6db672b15bda 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Unicycling Gorilla # *DOCUMENTATION* From e55b6a843b8a07b1c342f9155aa58d0274aef502 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Sun, 7 Apr 2013 20:52:38 -0700 Subject: [PATCH 092/303] Input: wacom - fix "can not retrieve extra class descriptor" for 24HDT The MFT device in the Cintiq 24HDT has two interfaces sharing the same configuration. Without this patch, the driver attempts to make use of both interfaces, even though the second interface is not compatible with this driver. Signed-off-by: Jason Gerecke Tested-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 1daa97913b7d..ed78be901b65 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -2209,7 +2209,7 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x47) }, { USB_DEVICE_WACOM(0xF4) }, { USB_DEVICE_WACOM(0xF8) }, - { USB_DEVICE_WACOM(0xF6) }, + { USB_DEVICE_DETAILED(0xF6, USB_CLASS_HID, 0, 0) }, { USB_DEVICE_WACOM(0xFA) }, { USB_DEVICE_LENOVO(0x6004) }, { } From 9fb2640159f9d4f5a2a9d60e490482d4cbecafdb Mon Sep 17 00:00:00 2001 From: Michael Wolf Date: Fri, 5 Apr 2013 10:41:40 +0000 Subject: [PATCH 093/303] powerpc: pSeries_lpar_hpte_remove fails from Adjunct partition being performed before the ANDCOND test Some versions of pHyp will perform the adjunct partition test before the ANDCOND test. The result of this is that H_RESOURCE can be returned and cause the BUG_ON condition to occur. The HPTE is not removed. So add a check for H_RESOURCE, it is ok if this HPTE is not removed as pSeries_lpar_hpte_remove is looking for an HPTE to remove and not a specific HPTE to remove. So it is ok to just move on to the next slot and try again. Cc: stable@vger.kernel.org Signed-off-by: Michael Wolf Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/pseries/lpar.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0da39fed355a..299731e9036b 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -186,7 +186,13 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group) (0x1UL << 4), &dummy1, &dummy2); if (lpar_rc == H_SUCCESS) return i; - BUG_ON(lpar_rc != H_NOT_FOUND); + + /* + * The test for adjunct partition is performed before the + * ANDCOND test. H_RESOURCE may be returned, so we need to + * check for that as well. + */ + BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE); slot_offset++; slot_offset &= 0x7; From c2fccc1c9f7c81700cbac2120a4ad5441dd37004 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 8 Apr 2013 10:12:11 +0200 Subject: [PATCH 094/303] Revert "loop: cleanup partitions when detaching loop device" This reverts commit 8761a3dc1f07b163414e2215a2cadbb4cfe2a107. There are situations where the destruction path is called with the bdev->bd_mutex already held, which then deadlocks in loop_clr_fd(). The normal partition cleanup does a trylock() on the mutex, but it'd be nice to have a more bullet proof method in loop. So punt this more involved fix to the next merge window, and just back out this buggy fix for now. Signed-off-by: Jens Axboe --- block/partition-generic.c | 1 - drivers/block/loop.c | 21 ++------------------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/block/partition-generic.c b/block/partition-generic.c index ae95ee6a58aa..789cdea05893 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -257,7 +257,6 @@ void delete_partition(struct gendisk *disk, int partno) hd_struct_put(part); } -EXPORT_SYMBOL(delete_partition); static ssize_t whole_disk_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fe5f6403417f..ee13a82f3f5e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1044,29 +1044,12 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_unbound; /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); + if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) + ioctl_by_bdev(bdev, BLKRRPART, 0); lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; mutex_unlock(&lo->lo_ctl_mutex); - - /* - * Remove all partitions, since BLKRRPART won't remove user - * added partitions when max_part=0 - */ - if (bdev) { - struct disk_part_iter piter; - struct hd_struct *part; - - mutex_lock_nested(&bdev->bd_mutex, 1); - invalidate_partition(bdev->bd_disk, 0); - disk_part_iter_init(&piter, bdev->bd_disk, - DISK_PITER_INCL_EMPTY); - while ((part = disk_part_iter_next(&piter))) - delete_partition(bdev->bd_disk, part->partno); - disk_part_iter_exit(&piter); - mutex_unlock(&bdev->bd_mutex); - } - /* * Need not hold lo_ctl_mutex to fput backing file. * Calling fput holding lo_ctl_mutex triggers a circular From a1cbcaa9ea87b87a96b9fc465951dcf36e459ca2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 6 Apr 2013 10:10:27 +0200 Subject: [PATCH 095/303] sched_clock: Prevent 64bit inatomicity on 32bit systems The sched_clock_remote() implementation has the following inatomicity problem on 32bit systems when accessing the remote scd->clock, which is a 64bit value. CPU0 CPU1 sched_clock_local() sched_clock_remote(CPU0) ... remote_clock = scd[CPU0]->clock read_low32bit(scd[CPU0]->clock) cmpxchg64(scd->clock,...) read_high32bit(scd[CPU0]->clock) While the update of scd->clock is using an atomic64 mechanism, the readout on the remote cpu is not, which can cause completely bogus readouts. It is a quite rare problem, because it requires the update to hit the narrow race window between the low/high readout and the update must go across the 32bit boundary. The resulting misbehaviour is, that CPU1 will see the sched_clock on CPU1 ~4 seconds ahead of it's own and update CPU1s sched_clock value to this bogus timestamp. This stays that way due to the clamping implementation for about 4 seconds until the synchronization with CLOCK_MONOTONIC undoes the problem. The issue is hard to observe, because it might only result in a less accurate SCHED_OTHER timeslicing behaviour. To create observable damage on realtime scheduling classes, it is necessary that the bogus update of CPU1 sched_clock happens in the context of an realtime thread, which then gets charged 4 seconds of RT runtime, which results in the RT throttler mechanism to trigger and prevent scheduling of RT tasks for a little less than 4 seconds. So this is quite unlikely as well. The issue was quite hard to decode as the reproduction time is between 2 days and 3 weeks and intrusive tracing makes it less likely, but the following trace recorded with trace_clock=global, which uses sched_clock_local(), gave the final hint: -0 0d..30 400269.477150: hrtimer_cancel: hrtimer=0xf7061e80 -0 0d..30 400269.477151: hrtimer_start: hrtimer=0xf7061e80 ... irq/20-S-587 1d..32 400273.772118: sched_wakeup: comm= ... target_cpu=0 -0 0dN.30 400273.772118: hrtimer_cancel: hrtimer=0xf7061e80 What happens is that CPU0 goes idle and invokes sched_clock_idle_sleep_event() which invokes sched_clock_local() and CPU1 runs a remote wakeup for CPU0 at the same time, which invokes sched_remote_clock(). The time jump gets propagated to CPU0 via sched_remote_clock() and stays stale on both cores for ~4 seconds. There are only two other possibilities, which could cause a stale sched clock: 1) ktime_get() which reads out CLOCK_MONOTONIC returns a sporadic wrong value. 2) sched_clock() which reads the TSC returns a sporadic wrong value. #1 can be excluded because sched_clock would continue to increase for one jiffy and then go stale. #2 can be excluded because it would not make the clock jump forward. It would just result in a stale sched_clock for one jiffy. After quite some brain twisting and finding the same pattern on other traces, sched_clock_remote() remained the only place which could cause such a problem and as explained above it's indeed racy on 32bit systems. So while on 64bit systems the readout is atomic, we need to verify the remote readout on 32bit machines. We need to protect the local->clock readout in sched_clock_remote() on 32bit as well because an NMI could hit between the low and the high readout, call sched_clock_local() and modify local->clock. Thanks to Siegfried Wulsch for bearing with my debug requests and going through the tedious tasks of running a bunch of reproducer systems to generate the debug information which let me decode the issue. Reported-by: Siegfried Wulsch Acked-by: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1304051544160.21884@ionos Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- kernel/sched/clock.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index c685e31492df..c3ae1446461c 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -176,10 +176,36 @@ static u64 sched_clock_remote(struct sched_clock_data *scd) u64 this_clock, remote_clock; u64 *ptr, old_val, val; +#if BITS_PER_LONG != 64 +again: + /* + * Careful here: The local and the remote clock values need to + * be read out atomic as we need to compare the values and + * then update either the local or the remote side. So the + * cmpxchg64 below only protects one readout. + * + * We must reread via sched_clock_local() in the retry case on + * 32bit as an NMI could use sched_clock_local() via the + * tracer and hit between the readout of + * the low32bit and the high 32bit portion. + */ + this_clock = sched_clock_local(my_scd); + /* + * We must enforce atomic readout on 32bit, otherwise the + * update on the remote cpu can hit inbetween the readout of + * the low32bit and the high 32bit portion. + */ + remote_clock = cmpxchg64(&scd->clock, 0, 0); +#else + /* + * On 64bit the read of [my]scd->clock is atomic versus the + * update, so we can avoid the above 32bit dance. + */ sched_clock_local(my_scd); again: this_clock = my_scd->clock; remote_clock = scd->clock; +#endif /* * Use the opportunity that we have both locks From b6c7aabd923a17af993c5a5d5d7995f0b27c000a Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 8 Apr 2013 11:44:57 +0100 Subject: [PATCH 096/303] ARM: Do 15e0d9e37c (ARM: pm: let platforms select cpu_suspend support) properly Let's do the changes properly and fix the same problem everywhere, not just for one case. Cc: # kernels containing 15e0d9e37c or equivalent Signed-off-by: Russell King --- arch/arm/mm/proc-arm920.S | 2 +- arch/arm/mm/proc-arm926.S | 2 +- arch/arm/mm/proc-mohawk.S | 2 +- arch/arm/mm/proc-sa1100.S | 2 +- arch/arm/mm/proc-v6.S | 2 +- arch/arm/mm/proc-xsc3.S | 2 +- arch/arm/mm/proc-xscale.S | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 2c3b9421ab5e..2556cf1c2da1 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -387,7 +387,7 @@ ENTRY(cpu_arm920_set_pte_ext) /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm920_suspend_size .equ cpu_arm920_suspend_size, 4 * 3 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_arm920_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c13, c0, 0 @ PID diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index f1803f7e2972..344c8a548cc0 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -402,7 +402,7 @@ ENTRY(cpu_arm926_set_pte_ext) /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm926_suspend_size .equ cpu_arm926_suspend_size, 4 * 3 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_arm926_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c13, c0, 0 @ PID diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 82f9cdc751d6..0b60dd3d742a 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -350,7 +350,7 @@ ENTRY(cpu_mohawk_set_pte_ext) .globl cpu_mohawk_suspend_size .equ cpu_mohawk_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_mohawk_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 3aa0da11fd84..d92dfd081429 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -172,7 +172,7 @@ ENTRY(cpu_sa1100_set_pte_ext) .globl cpu_sa1100_suspend_size .equ cpu_sa1100_suspend_size, 4 * 3 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_sa1100_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c3, c0, 0 @ domain ID diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index bcaaa8de9325..5c07ee4fe3eb 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -138,7 +138,7 @@ ENTRY(cpu_v6_set_pte_ext) /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ .globl cpu_v6_suspend_size .equ cpu_v6_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_v6_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index eb93d6487f35..e8efd83b6f25 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -413,7 +413,7 @@ ENTRY(cpu_xsc3_set_pte_ext) .globl cpu_xsc3_suspend_size .equ cpu_xsc3_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_xsc3_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 25510361aa18..e766f889bfd6 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -528,7 +528,7 @@ ENTRY(cpu_xscale_set_pte_ext) .globl cpu_xscale_suspend_size .equ cpu_xscale_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_xscale_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode From 62a40a15554d6924a58b3e9f8756e0d683dc9c0c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Apr 2013 11:52:34 +0200 Subject: [PATCH 097/303] mac80211: fix LED in idle handling feng xiangjun reports that my commit 382a103b2b528a3085cde4ac56fc69d92a828b72 Author: Johannes Berg Date: Fri Mar 22 22:30:09 2013 +0100 mac80211: fix idle handling sequence broke the wireless status LED. The reason is that we now call ieee80211_idle_off() when the channel context is assigned, and that doesn't recalculate the LED state. Fix this by making that function a wrapper around most of idle recalculation while forcing active. Reported-by: feng xiangjun Tested-by: feng xiangjun Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 58150f877ec3..9ed49ad0380f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -78,7 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } -u32 ieee80211_idle_off(struct ieee80211_local *local) +static u32 __ieee80211_idle_off(struct ieee80211_local *local) { if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) return 0; @@ -87,7 +87,7 @@ u32 ieee80211_idle_off(struct ieee80211_local *local) return IEEE80211_CONF_CHANGE_IDLE; } -static u32 ieee80211_idle_on(struct ieee80211_local *local) +static u32 __ieee80211_idle_on(struct ieee80211_local *local) { if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; @@ -98,16 +98,18 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) return IEEE80211_CONF_CHANGE_IDLE; } -void ieee80211_recalc_idle(struct ieee80211_local *local) +static u32 __ieee80211_recalc_idle(struct ieee80211_local *local, + bool force_active) { bool working = false, scanning, active; unsigned int led_trig_start = 0, led_trig_stop = 0; struct ieee80211_roc_work *roc; - u32 change; lockdep_assert_held(&local->mtx); - active = !list_empty(&local->chanctx_list) || local->monitors; + active = force_active || + !list_empty(&local->chanctx_list) || + local->monitors; if (!local->ops->remain_on_channel) { list_for_each_entry(roc, &local->roc_list, list) { @@ -132,9 +134,18 @@ void ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); if (working || scanning || active) - change = ieee80211_idle_off(local); - else - change = ieee80211_idle_on(local); + return __ieee80211_idle_off(local); + return __ieee80211_idle_on(local); +} + +u32 ieee80211_idle_off(struct ieee80211_local *local) +{ + return __ieee80211_recalc_idle(local, true); +} + +void ieee80211_recalc_idle(struct ieee80211_local *local) +{ + u32 change = __ieee80211_recalc_idle(local, false); if (change) ieee80211_hw_config(local, change); } From fd9b86d37a600488dbd80fe60cca46b822bff1cd Mon Sep 17 00:00:00 2001 From: libin Date: Mon, 8 Apr 2013 14:39:12 +0800 Subject: [PATCH 098/303] sched/debug: Fix sd->*_idx limit range avoiding overflow Commit 201c373e8e ("sched/debug: Limit sd->*_idx range on sysctl") was an incomplete bug fix. This patch fixes sd->*_idx limit range to [0 ~ CPU_LOAD_IDX_MAX-1] avoiding array overflow caused by setting sd->*_idx to CPU_LOAD_IDX_MAX on sysctl. Signed-off-by: Libin Cc: Cc: Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/51626610.2040607@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 306943f531a3..fa077929e315 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4933,7 +4933,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) } static int min_load_idx = 0; -static int max_load_idx = CPU_LOAD_IDX_MAX; +static int max_load_idx = CPU_LOAD_IDX_MAX-1; static void set_table_entry(struct ctl_table *entry, From c97847d2f0eb77c806e650e04d9bbcf79fa05730 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 8 Apr 2013 11:48:27 +0800 Subject: [PATCH 099/303] perf: Fix strncpy() use, always make sure it's NUL terminated For NUL terminated string, always make sure that there's '\0' at the end. In our case we need a return value, so still use strncpy() and fix up the tail explicitly. (strlcpy() returns the size, not the pointer) Signed-off-by: Chen Gang Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org Cc: acme@ghostprotocols.net Link: http://lkml.kernel.org/r/51623E0B.7070101@asianux.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 59412d037eed..7f0d67ea3f48 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4737,7 +4737,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) } else { if (arch_vma_name(mmap_event->vma)) { name = strncpy(tmp, arch_vma_name(mmap_event->vma), - sizeof(tmp)); + sizeof(tmp) - 1); + tmp[sizeof(tmp) - 1] = '\0'; goto got_name; } From 67012ab1d2ce871afea4ee55408f233f97d09d07 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 8 Apr 2013 12:06:44 +0800 Subject: [PATCH 100/303] perf: Fix strncpy() use, use strlcpy() instead of strncpy() For NUL terminated string we always need to set '\0' at the end. Signed-off-by: Chen Gang Cc: rostedt@goodmis.org Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/51624254.30301@asianux.com Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4f1dade56981..3f5046a4d81b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -132,7 +132,7 @@ static char *default_bootup_tracer; static int __init set_cmdline_ftrace(char *str) { - strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); + strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; /* We are using ftrace early, expand it */ ring_buffer_expanded = 1; @@ -162,7 +162,7 @@ static char *trace_boot_options __initdata; static int __init set_trace_boot_options(char *str) { - strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); + strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); trace_boot_options = trace_boot_options_buf; return 0; } From 75761cc15877c155b3849b4e0e0cb3f897faf471 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 8 Apr 2013 12:12:39 +0800 Subject: [PATCH 101/303] ftrace: Fix strncpy() use, use strlcpy() instead of strncpy() For NUL terminated string we always need to set '\0' at the end. Signed-off-by: Chen Gang Cc: rostedt@goodmis.org Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/516243B7.9020405@asianux.com Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6893d5a2bf08..db143742704b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3441,14 +3441,14 @@ static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; static int __init set_ftrace_notrace(char *str) { - strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); + strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_notrace=", set_ftrace_notrace); static int __init set_ftrace_filter(char *str) { - strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); + strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_filter=", set_ftrace_filter); From e614b3332a4f3f264a26da28e5a1f4cc3aea3974 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 4 Apr 2013 10:57:48 +0200 Subject: [PATCH 102/303] sched/cputime: Fix accounting on multi-threaded processes Recent commit 6fac4829 ("cputime: Use accessors to read task cputime stats") introduced a bug, where we account many times the cputime of the first thread, instead of cputimes of all the different threads. Signed-off-by: Stanislaw Gruszka Acked-by: Frederic Weisbecker Cc: Oleg Nesterov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20130404085740.GA2495@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/cputime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ed12cbb135f4..e93cca92f38b 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -310,7 +310,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) t = tsk; do { - task_cputime(tsk, &utime, &stime); + task_cputime(t, &utime, &stime); times->utime += utime; times->stime += stime; times->sum_exec_runtime += task_sched_runtime(t); From 05a324b9c50c3edbe0ce48ee3e37b210859ef1ae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Apr 2013 23:39:38 +0000 Subject: [PATCH 103/303] net: ipv4: reset check_lifetime_work after changing lifetime This will result in calling check_lifetime in nearest opportunity and that function will adjust next time to call check_lifetime correctly. Without this, check_lifetime is called in time computed by previous run, not affecting modified lifetime. Introduced by: commit 5c766d642bcaf "ipv4: introduce address lifetime" Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 96083b7a436b..00386e02e708 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -804,6 +804,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg return -EEXIST; ifa = ifa_existing; set_ifa_lifetime(ifa, valid_lft, prefered_lft); + cancel_delayed_work(&check_lifetime_work); + schedule_delayed_work(&check_lifetime_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } From c988d1e8cbf722e34ee6124b8b89d47fec655b51 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Apr 2013 23:39:39 +0000 Subject: [PATCH 104/303] net: ipv4: fix schedule while atomic bug in check_lifetime() move might_sleep operations out of the rcu_read_lock() section. Also fix iterating over ifa_dev->ifa_list Introduced by: commit 5c766d642bcaf "ipv4: introduce address lifetime" Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 58 +++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 00386e02e708..c6287cd978c2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -587,13 +587,16 @@ static void check_lifetime(struct work_struct *work) { unsigned long now, next, next_sec, next_sched; struct in_ifaddr *ifa; + struct hlist_node *n; int i; now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - rcu_read_lock(); for (i = 0; i < IN4_ADDR_HSIZE; i++) { + bool change_needed = false; + + rcu_read_lock(); hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { unsigned long age; @@ -606,16 +609,7 @@ static void check_lifetime(struct work_struct *work) if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && age >= ifa->ifa_valid_lft) { - struct in_ifaddr **ifap ; - - rtnl_lock(); - for (ifap = &ifa->ifa_dev->ifa_list; - *ifap != NULL; ifap = &ifa->ifa_next) { - if (*ifap == ifa) - inet_del_ifa(ifa->ifa_dev, - ifap, 1); - } - rtnl_unlock(); + change_needed = true; } else if (ifa->ifa_preferred_lft == INFINITY_LIFE_TIME) { continue; @@ -625,10 +619,8 @@ static void check_lifetime(struct work_struct *work) next = ifa->ifa_tstamp + ifa->ifa_valid_lft * HZ; - if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) { - ifa->ifa_flags |= IFA_F_DEPRECATED; - rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); - } + if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) + change_needed = true; } else if (time_before(ifa->ifa_tstamp + ifa->ifa_preferred_lft * HZ, next)) { @@ -636,8 +628,42 @@ static void check_lifetime(struct work_struct *work) ifa->ifa_preferred_lft * HZ; } } + rcu_read_unlock(); + if (!change_needed) + continue; + rtnl_lock(); + hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { + unsigned long age; + + if (ifa->ifa_flags & IFA_F_PERMANENT) + continue; + + /* We try to batch several events at once. */ + age = (now - ifa->ifa_tstamp + + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + + if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && + age >= ifa->ifa_valid_lft) { + struct in_ifaddr **ifap; + + for (ifap = &ifa->ifa_dev->ifa_list; + *ifap != NULL; ifap = &(*ifap)->ifa_next) { + if (*ifap == ifa) { + inet_del_ifa(ifa->ifa_dev, + ifap, 1); + break; + } + } + } else if (ifa->ifa_preferred_lft != + INFINITY_LIFE_TIME && + age >= ifa->ifa_preferred_lft && + !(ifa->ifa_flags & IFA_F_DEPRECATED)) { + ifa->ifa_flags |= IFA_F_DEPRECATED; + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); + } + } + rtnl_unlock(); } - rcu_read_unlock(); next_sec = round_jiffies_up(next); next_sched = next; From fd5c07a8d6a10c7112b19f3b0d428627c62b06ab Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 5 Apr 2013 11:44:39 +0000 Subject: [PATCH 105/303] hyperv: Fix a kernel warning from netvsc_linkstatus_callback() The warning about local_bh_enable inside IRQ happens when disconnecting a virtual NIC. The reason for the warning is -- netif_tx_disable() is called when the NIC is disconnected. And it's called within irq context. netif_tx_disable() calls local_bh_enable() which displays warning if in irq. The fix is to remove the unnecessary netif_tx_disable & wake_queue() in the netvsc_linkstatus_callback(). Reported-by: Richard Genoud Tested-by: Long Li Tested-by: Richard Genoud Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5f85205cd12b..8341b62e5521 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -241,13 +241,11 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, if (status == 1) { netif_carrier_on(net); - netif_wake_queue(net); ndev_ctx = netdev_priv(net); schedule_delayed_work(&ndev_ctx->dwork, 0); schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); } else { netif_carrier_off(net); - netif_tx_disable(net); } } From f1ea3cd70110d482ef1ce6ef158df113aa366f43 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 5 Apr 2013 11:44:40 +0000 Subject: [PATCH 106/303] hyperv: Fix RNDIS send_completion code path In some cases, the VM_PKT_COMP message can arrive later than RNDIS completion message, which will free the packet memory. This may cause panic due to access to freed memory in netvsc_send_completion(). This patch fixes this problem by removing rndis_filter_send_request_completion() from the code path. The function was a no-op. Reported-by: Long Li Tested-by: Long Li Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 17 ++++++++++++----- drivers/net/hyperv/rndis_filter.c | 14 +------------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 1cd77483da50..f5f0f09e4cc5 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -470,8 +470,10 @@ static void netvsc_send_completion(struct hv_device *device, packet->trans_id; /* Notify the layer above us */ - nvsc_packet->completion.send.send_completion( - nvsc_packet->completion.send.send_completion_ctx); + if (nvsc_packet) + nvsc_packet->completion.send.send_completion( + nvsc_packet->completion.send. + send_completion_ctx); num_outstanding_sends = atomic_dec_return(&net_device->num_outstanding_sends); @@ -498,6 +500,7 @@ int netvsc_send(struct hv_device *device, int ret = 0; struct nvsp_message sendMessage; struct net_device *ndev; + u64 req_id; net_device = get_outbound_net_device(device); if (!net_device) @@ -518,20 +521,24 @@ int netvsc_send(struct hv_device *device, 0xFFFFFFFF; sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0; + if (packet->completion.send.send_completion) + req_id = (u64)packet; + else + req_id = 0; + if (packet->page_buf_cnt) { ret = vmbus_sendpacket_pagebuffer(device->channel, packet->page_buf, packet->page_buf_cnt, &sendMessage, sizeof(struct nvsp_message), - (unsigned long)packet); + req_id); } else { ret = vmbus_sendpacket(device->channel, &sendMessage, sizeof(struct nvsp_message), - (unsigned long)packet, + req_id, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); - } if (ret == 0) { diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 2b657d4d63a8..0775f0aefd1e 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -61,9 +61,6 @@ struct rndis_request { static void rndis_filter_send_completion(void *ctx); -static void rndis_filter_send_request_completion(void *ctx); - - static struct rndis_device *get_rndis_device(void) { @@ -241,10 +238,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, packet->page_buf[0].len; } - packet->completion.send.send_completion_ctx = req;/* packet; */ - packet->completion.send.send_completion = - rndis_filter_send_request_completion; - packet->completion.send.send_completion_tid = (unsigned long)dev; + packet->completion.send.send_completion = NULL; ret = netvsc_send(dev->net_dev->dev, packet); return ret; @@ -999,9 +993,3 @@ static void rndis_filter_send_completion(void *ctx) /* Pass it back to the original handler */ filter_pkt->completion(filter_pkt->completion_ctx); } - - -static void rndis_filter_send_request_completion(void *ctx) -{ - /* Noop */ -} From b50b72de2f2feed4adfbd8e18610a393b5a04cc7 Mon Sep 17 00:00:00 2001 From: willy tarreau Date: Sat, 6 Apr 2013 08:47:01 +0000 Subject: [PATCH 107/303] net: mvneta: enable features before registering the driver It seems that the reason why the dev features were ignored was because they were enabled after registeration. Signed-off-by: Willy Tarreau Acked-by: Thomas Petazzoni Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index cd345b8969bc..1e628ce57201 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2771,16 +2771,17 @@ static int mvneta_probe(struct platform_device *pdev) netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight); + dev->features = NETIF_F_SG | NETIF_F_IP_CSUM; + dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM; + dev->vlan_features |= NETIF_F_SG | NETIF_F_IP_CSUM; + dev->priv_flags |= IFF_UNICAST_FLT; + err = register_netdev(dev); if (err < 0) { dev_err(&pdev->dev, "failed to register\n"); goto err_deinit; } - dev->features = NETIF_F_SG | NETIF_F_IP_CSUM; - dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM; - dev->priv_flags |= IFF_UNICAST_FLT; - netdev_info(dev, "mac: %pM\n", dev->dev_addr); platform_set_drvdata(pdev, pp->dev); From 2930e04d00e113ae24bb2b7c2b58de7b648a62c7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 26 Mar 2013 17:33:00 -0400 Subject: [PATCH 108/303] tracing: Fix race with update_max_tr_single and changing tracers The commit 34600f0e9 "tracing: Fix race with max_tr and changing tracers" fixed the updating of the main buffers with the race of changing tracers, but left out the fix to the updating of just a per cpu buffer. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4f1dade56981..7ba7fc76f9eb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -744,8 +744,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); - if (WARN_ON_ONCE(!current_trace->allocated_snapshot)) + if (!current_trace->allocated_snapshot) { + /* Only the nop tracer should hit this when disabling */ + WARN_ON_ONCE(current_trace != &nop_trace); return; + } arch_spin_lock(&ftrace_max_lock); From 5000c418840b309251c5887f0b56503aae30f84c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 26 Mar 2013 17:53:03 +0100 Subject: [PATCH 109/303] ftrace: Consistently restore trace function on sysctl enabling If we reenable ftrace via syctl, we currently set ftrace_trace_function based on the previous simplistic algorithm. This is inconsistent with what update_ftrace_function does. So better call that helper instead. Link: http://lkml.kernel.org/r/5151D26F.1070702@siemens.com Cc: stable@vger.kernel.org Signed-off-by: Jan Kiszka Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6893d5a2bf08..cc4943c7ce6d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4555,12 +4555,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, ftrace_startup_sysctl(); /* we are starting ftrace again */ - if (ftrace_ops_list != &ftrace_list_end) { - if (ftrace_ops_list->next == &ftrace_list_end) - ftrace_trace_function = ftrace_ops_list->func; - else - ftrace_trace_function = ftrace_ops_list_func; - } + if (ftrace_ops_list != &ftrace_list_end) + update_ftrace_function(); } else { /* stopping ftrace calls (just send to ftrace_stub) */ From 395b97a3aeff0b8d949ee3e67bf8c11c5ffd6861 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 27 Mar 2013 09:31:28 -0400 Subject: [PATCH 110/303] ftrace: Do not call stub functions in control loop The function tracing control loop used by perf spits out a warning if the called function is not a control function. This is because the control function references a per cpu allocated data structure on struct ftrace_ops that is not allocated for other types of functions. commit 0a016409e42 "ftrace: Optimize the function tracer list loop" Had an optimization done to all function tracing loops to optimize for a single registered ops. Unfortunately, this allows for a slight race when tracing starts or ends, where the stub function might be called after the current registered ops is removed. In this case we get the following dump: root# perf stat -e ftrace:function sleep 1 [ 74.339105] WARNING: at include/linux/ftrace.h:209 ftrace_ops_control_func+0xde/0xf0() [ 74.349522] Hardware name: PRIMERGY RX200 S6 [ 74.357149] Modules linked in: sg igb iTCO_wdt ptp pps_core iTCO_vendor_support i7core_edac dca lpc_ich i2c_i801 coretemp edac_core crc32c_intel mfd_core ghash_clmulni_intel dm_multipath acpi_power_meter pcspk r microcode vhost_net tun macvtap macvlan nfsd kvm_intel kvm auth_rpcgss nfs_acl lockd sunrpc uinput xfs libcrc32c sd_mod crc_t10dif sr_mod cdrom mgag200 i2c_algo_bit drm_kms_helper ttm qla2xxx mptsas ahci drm li bahci scsi_transport_sas mptscsih libata scsi_transport_fc i2c_core mptbase scsi_tgt dm_mirror dm_region_hash dm_log dm_mod [ 74.446233] Pid: 1377, comm: perf Tainted: G W 3.9.0-rc1 #1 [ 74.453458] Call Trace: [ 74.456233] [] warn_slowpath_common+0x7f/0xc0 [ 74.462997] [] ? rcu_note_context_switch+0xa0/0xa0 [ 74.470272] [] ? __unregister_ftrace_function+0xa2/0x1a0 [ 74.478117] [] warn_slowpath_null+0x1a/0x20 [ 74.484681] [] ftrace_ops_control_func+0xde/0xf0 [ 74.491760] [] ftrace_call+0x5/0x2f [ 74.497511] [] ? ftrace_call+0x5/0x2f [ 74.503486] [] ? ftrace_call+0x5/0x2f [ 74.509500] [] ? synchronize_sched+0x5/0x50 [ 74.516088] [] ? _cond_resched+0x5/0x40 [ 74.522268] [] ? synchronize_sched+0x5/0x50 [ 74.528837] [] ? __unregister_ftrace_function+0xa2/0x1a0 [ 74.536696] [] ? _cond_resched+0x5/0x40 [ 74.542878] [] ? mutex_lock+0x1d/0x50 [ 74.548869] [] unregister_ftrace_function+0x27/0x50 [ 74.556243] [] perf_ftrace_event_register+0x9f/0x140 [ 74.563709] [] ? _cond_resched+0x5/0x40 [ 74.569887] [] ? mutex_lock+0x1d/0x50 [ 74.575898] [] perf_trace_destroy+0x2e/0x50 [ 74.582505] [] tp_perf_event_destroy+0x9/0x10 [ 74.589298] [] free_event+0x70/0x1a0 [ 74.595208] [] perf_event_release_kernel+0x69/0xa0 [ 74.602460] [] ? _cond_resched+0x5/0x40 [ 74.608667] [] put_event+0x90/0xc0 [ 74.614373] [] perf_release+0x10/0x20 [ 74.620367] [] __fput+0xf4/0x280 [ 74.625894] [] ____fput+0xe/0x10 [ 74.631387] [] task_work_run+0xa7/0xe0 [ 74.637452] [] do_notify_resume+0x71/0xb0 [ 74.643843] [] int_signal+0x12/0x17 To fix this a new ftrace_ops flag is added that denotes the ftrace_list_end ftrace_ops stub as just that, a stub. This flag is now checked in the control loop and the function is not called if the flag is set. Thanks to Jovi for not just reporting the bug, but also pointing out where the bug was in the code. Link: http://lkml.kernel.org/r/514A8855.7090402@redhat.com Link: http://lkml.kernel.org/r/1364377499-1900-15-git-send-email-jovi.zhangwei@huawei.com Tested-by: WANG Chao Reported-by: WANG Chao Reported-by: zhangwei(Jovi) Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e5ca8ef50e9b..167abf907802 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -89,6 +89,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * that the call back has its own recursion protection. If it does * not set this, then the ftrace infrastructure will add recursion * protection for the caller. + * STUB - The ftrace_ops is just a place holder. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -98,6 +99,7 @@ enum { FTRACE_OPS_FL_SAVE_REGS = 1 << 4, FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6, + FTRACE_OPS_FL_STUB = 1 << 7, }; struct ftrace_ops { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cc4943c7ce6d..7e897106b7e0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -66,7 +66,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, }; /* ftrace_enabled is a method to turn ftrace on or off */ @@ -4131,7 +4131,8 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); trace_recursion_set(TRACE_CONTROL_BIT); do_for_each_ftrace_op(op, ftrace_control_list) { - if (!ftrace_function_local_disabled(op) && + if (!(op->flags & FTRACE_OPS_FL_STUB) && + !ftrace_function_local_disabled(op) && ftrace_ops_test(op, ip)) op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); From c12f9ea28e9fbe7b1f4cefc953bee33298370683 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 7 Apr 2013 20:47:18 +0100 Subject: [PATCH 111/303] Revert "tty/8250_pnp: serial port detection regression since v3.7" This reverts commit 77e372a3d82e5e4878ce1962207edd766773cc76. Checking for disabled resources board breaks detection pnp on another board "AMI UEFI implementation (Version: 0406 Release Date: 06/06/2012)". I'm working with the reporter of the original bug to write and test a better fix. https://bugzilla.redhat.com/show_bug.cgi?id=928246 Signed-off-by: Sean Young Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pnp.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pnp.c b/drivers/tty/serial/8250/8250_pnp.c index b3455a970a1d..35d9ab95c5cb 100644 --- a/drivers/tty/serial/8250/8250_pnp.c +++ b/drivers/tty/serial/8250/8250_pnp.c @@ -429,7 +429,6 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) { struct uart_8250_port uart; int ret, line, flags = dev_id->driver_data; - struct resource *res = NULL; if (flags & UNKNOWN_DEV) { ret = serial_pnp_guess_board(dev); @@ -440,12 +439,11 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) memset(&uart, 0, sizeof(uart)); if (pnp_irq_valid(dev, 0)) uart.port.irq = pnp_irq(dev, 0); - if ((flags & CIR_PORT) && pnp_port_valid(dev, 2)) - res = pnp_get_resource(dev, IORESOURCE_IO, 2); - else if (pnp_port_valid(dev, 0)) - res = pnp_get_resource(dev, IORESOURCE_IO, 0); - if (pnp_resource_enabled(res)) { - uart.port.iobase = res->start; + if ((flags & CIR_PORT) && pnp_port_valid(dev, 2)) { + uart.port.iobase = pnp_port_start(dev, 2); + uart.port.iotype = UPIO_PORT; + } else if (pnp_port_valid(dev, 0)) { + uart.port.iobase = pnp_port_start(dev, 0); uart.port.iotype = UPIO_PORT; } else if (pnp_mem_valid(dev, 0)) { uart.port.mapbase = pnp_mem_start(dev, 0); From 1b581f173992cded311f810346378f763a1f1e5d Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 7 Apr 2013 23:46:47 +0400 Subject: [PATCH 112/303] tty: mxser: fix cycle termination condition in mxser_probe() and mxser_module_init() There is a bug in resources deallocation code in mxser_probe() and mxser_module_init(). As soon as variable 'i' is unsigned int, cycle termination condition i >= 0 is always true. The patch fixes the issue. Signed-off-by: Alexey Khoroshilov Acked-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/mxser.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c index 484b6a3c9b03..302909ccf183 100644 --- a/drivers/tty/mxser.c +++ b/drivers/tty/mxser.c @@ -2643,9 +2643,9 @@ static int mxser_probe(struct pci_dev *pdev, mxvar_sdriver, brd->idx + i, &pdev->dev); if (IS_ERR(tty_dev)) { retval = PTR_ERR(tty_dev); - for (i--; i >= 0; i--) + for (; i > 0; i--) tty_unregister_device(mxvar_sdriver, - brd->idx + i); + brd->idx + i - 1); goto err_relbrd; } } @@ -2751,9 +2751,9 @@ static int __init mxser_module_init(void) tty_dev = tty_port_register_device(&brd->ports[i].port, mxvar_sdriver, brd->idx + i, NULL); if (IS_ERR(tty_dev)) { - for (i--; i >= 0; i--) + for (; i > 0; i--) tty_unregister_device(mxvar_sdriver, - brd->idx + i); + brd->idx + i - 1); for (i = 0; i < brd->info->nports; i++) tty_port_destroy(&brd->ports[i].port); free_irq(brd->irq, brd); From 5d8cd3b16ec7b56d7502ff1ae21ae6596d2f96e9 Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Tue, 2 Apr 2013 12:51:48 +0200 Subject: [PATCH 113/303] ath9k: fix DFS detector called in softirq context Signed-off-by: Zefir Kurtisi Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/dfs_pattern_detector.c | 4 ++-- drivers/net/wireless/ath/ath9k/dfs_pri_detector.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/dfs_pattern_detector.c b/drivers/net/wireless/ath/ath9k/dfs_pattern_detector.c index 467b60014b7b..73fe8d6db566 100644 --- a/drivers/net/wireless/ath/ath9k/dfs_pattern_detector.c +++ b/drivers/net/wireless/ath/ath9k/dfs_pattern_detector.c @@ -143,14 +143,14 @@ channel_detector_create(struct dfs_pattern_detector *dpd, u16 freq) u32 sz, i; struct channel_detector *cd; - cd = kmalloc(sizeof(*cd), GFP_KERNEL); + cd = kmalloc(sizeof(*cd), GFP_ATOMIC); if (cd == NULL) goto fail; INIT_LIST_HEAD(&cd->head); cd->freq = freq; sz = sizeof(cd->detectors) * dpd->num_radar_types; - cd->detectors = kzalloc(sz, GFP_KERNEL); + cd->detectors = kzalloc(sz, GFP_ATOMIC); if (cd->detectors == NULL) goto fail; diff --git a/drivers/net/wireless/ath/ath9k/dfs_pri_detector.c b/drivers/net/wireless/ath/ath9k/dfs_pri_detector.c index 91b8dceeadb1..5e48c5515b8c 100644 --- a/drivers/net/wireless/ath/ath9k/dfs_pri_detector.c +++ b/drivers/net/wireless/ath/ath9k/dfs_pri_detector.c @@ -218,7 +218,7 @@ static bool pulse_queue_enqueue(struct pri_detector *pde, u64 ts) { struct pulse_elem *p = pool_get_pulse_elem(); if (p == NULL) { - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kmalloc(sizeof(*p), GFP_ATOMIC); if (p == NULL) { DFS_POOL_STAT_INC(pulse_alloc_error); return false; @@ -299,7 +299,7 @@ static bool pseq_handler_create_sequences(struct pri_detector *pde, ps.deadline_ts = ps.first_ts + ps.dur; new_ps = pool_get_pseq_elem(); if (new_ps == NULL) { - new_ps = kmalloc(sizeof(*new_ps), GFP_KERNEL); + new_ps = kmalloc(sizeof(*new_ps), GFP_ATOMIC); if (new_ps == NULL) { DFS_POOL_STAT_INC(pseq_alloc_error); return false; From 319e7bd96aca64a478f3aad40711c928405b8b77 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 7 Apr 2013 21:10:48 +0200 Subject: [PATCH 114/303] ath9k_htc: accept 1.x firmware newer than 1.3 Since the firmware has been open sourced, the minor version has been bumped to 1.4 and the API/ABI will stay compatible across further 1.x releases. Signed-off-by: Felix Fietkau Cc: stable@vger.kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/htc_drv_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index 716058b67557..a47f5e05fc04 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -796,7 +796,7 @@ static int ath9k_init_firmware_version(struct ath9k_htc_priv *priv) * required version. */ if (priv->fw_version_major != MAJOR_VERSION_REQ || - priv->fw_version_minor != MINOR_VERSION_REQ) { + priv->fw_version_minor < MINOR_VERSION_REQ) { dev_err(priv->dev, "ath9k_htc: Please upgrade to FW version %d.%d\n", MAJOR_VERSION_REQ, MINOR_VERSION_REQ); return -EINVAL; From 1527c343c12f3a2aae532aa881d12c6fbf8749f4 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 4 Apr 2013 12:10:11 +0200 Subject: [PATCH 115/303] brcmfmac: remove advertising P2P device support For v3.9 kernel the P2P functionality was merged, but it does not fully support the P2P_DEVICE interface type. This patch removes advertising that support. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index ec46ffff5409..78da3eff75e8 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -4126,10 +4126,6 @@ static const struct ieee80211_iface_limit brcmf_iface_limits[] = { BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP) }, - { - .max = 1, - .types = BIT(NL80211_IFTYPE_P2P_DEVICE) - }, { .max = 1, .types = BIT(NL80211_IFTYPE_P2P_CLIENT) | @@ -4187,8 +4183,7 @@ static struct wiphy *brcmf_setup_wiphy(struct device *phydev) BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_P2P_CLIENT) | - BIT(NL80211_IFTYPE_P2P_GO) | - BIT(NL80211_IFTYPE_P2P_DEVICE); + BIT(NL80211_IFTYPE_P2P_GO); wiphy->iface_combinations = brcmf_iface_combos; wiphy->n_iface_combinations = ARRAY_SIZE(brcmf_iface_combos); wiphy->bands[IEEE80211_BAND_2GHZ] = &__wl_band_2ghz; From 25b5632fb35ca61b8ae3eee235edcdc2883f7a5e Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 4 Apr 2013 12:10:10 +0200 Subject: [PATCH 116/303] brcmsmac: request firmware in .start() callback The firmware is requested from user-space. To assure the request is handled it is recommended to do the request upon IFF_UP. For a mac80211 driver the .start() callback can be considered the equivalent. Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Piotr Haber Reviewed-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- .../wireless/brcm80211/brcmsmac/mac80211_if.c | 264 +++++++++--------- 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index c6451c61407a..e2340b231aa1 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -274,6 +274,130 @@ static void brcms_set_basic_rate(struct brcm_rateset *rs, u16 rate, bool is_br) } } +/** + * This function frees the WL per-device resources. + * + * This function frees resources owned by the WL device pointed to + * by the wl parameter. + * + * precondition: can both be called locked and unlocked + * + */ +static void brcms_free(struct brcms_info *wl) +{ + struct brcms_timer *t, *next; + + /* free ucode data */ + if (wl->fw.fw_cnt) + brcms_ucode_data_free(&wl->ucode); + if (wl->irq) + free_irq(wl->irq, wl); + + /* kill dpc */ + tasklet_kill(&wl->tasklet); + + if (wl->pub) { + brcms_debugfs_detach(wl->pub); + brcms_c_module_unregister(wl->pub, "linux", wl); + } + + /* free common resources */ + if (wl->wlc) { + brcms_c_detach(wl->wlc); + wl->wlc = NULL; + wl->pub = NULL; + } + + /* virtual interface deletion is deferred so we cannot spinwait */ + + /* wait for all pending callbacks to complete */ + while (atomic_read(&wl->callbacks) > 0) + schedule(); + + /* free timers */ + for (t = wl->timers; t; t = next) { + next = t->next; +#ifdef DEBUG + kfree(t->name); +#endif + kfree(t); + } +} + +/* +* called from both kernel as from this kernel module (error flow on attach) +* precondition: perimeter lock is not acquired. +*/ +static void brcms_remove(struct bcma_device *pdev) +{ + struct ieee80211_hw *hw = bcma_get_drvdata(pdev); + struct brcms_info *wl = hw->priv; + + if (wl->wlc) { + wiphy_rfkill_set_hw_state(wl->pub->ieee_hw->wiphy, false); + wiphy_rfkill_stop_polling(wl->pub->ieee_hw->wiphy); + ieee80211_unregister_hw(hw); + } + + brcms_free(wl); + + bcma_set_drvdata(pdev, NULL); + ieee80211_free_hw(hw); +} + +/* + * Precondition: Since this function is called in brcms_pci_probe() context, + * no locking is required. + */ +static void brcms_release_fw(struct brcms_info *wl) +{ + int i; + for (i = 0; i < MAX_FW_IMAGES; i++) { + release_firmware(wl->fw.fw_bin[i]); + release_firmware(wl->fw.fw_hdr[i]); + } +} + +/* + * Precondition: Since this function is called in brcms_pci_probe() context, + * no locking is required. + */ +static int brcms_request_fw(struct brcms_info *wl, struct bcma_device *pdev) +{ + int status; + struct device *device = &pdev->dev; + char fw_name[100]; + int i; + + memset(&wl->fw, 0, sizeof(struct brcms_firmware)); + for (i = 0; i < MAX_FW_IMAGES; i++) { + if (brcms_firmwares[i] == NULL) + break; + sprintf(fw_name, "%s-%d.fw", brcms_firmwares[i], + UCODE_LOADER_API_VER); + status = request_firmware(&wl->fw.fw_bin[i], fw_name, device); + if (status) { + wiphy_err(wl->wiphy, "%s: fail to load firmware %s\n", + KBUILD_MODNAME, fw_name); + return status; + } + sprintf(fw_name, "%s_hdr-%d.fw", brcms_firmwares[i], + UCODE_LOADER_API_VER); + status = request_firmware(&wl->fw.fw_hdr[i], fw_name, device); + if (status) { + wiphy_err(wl->wiphy, "%s: fail to load firmware %s\n", + KBUILD_MODNAME, fw_name); + return status; + } + wl->fw.hdr_num_entries[i] = + wl->fw.fw_hdr[i]->size / (sizeof(struct firmware_hdr)); + } + wl->fw.fw_cnt = i; + status = brcms_ucode_data_init(wl, &wl->ucode); + brcms_release_fw(wl); + return status; +} + static void brcms_ops_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb) @@ -306,6 +430,14 @@ static int brcms_ops_start(struct ieee80211_hw *hw) if (!blocked) wiphy_rfkill_stop_polling(wl->pub->ieee_hw->wiphy); + if (!wl->ucode.bcm43xx_bomminor) { + err = brcms_request_fw(wl, wl->wlc->hw->d11core); + if (err) { + brcms_remove(wl->wlc->hw->d11core); + return -ENOENT; + } + } + spin_lock_bh(&wl->lock); /* avoid acknowledging frames before a non-monitor device is added */ wl->mute_tx = true; @@ -793,128 +925,6 @@ void brcms_dpc(unsigned long data) wake_up(&wl->tx_flush_wq); } -/* - * Precondition: Since this function is called in brcms_pci_probe() context, - * no locking is required. - */ -static int brcms_request_fw(struct brcms_info *wl, struct bcma_device *pdev) -{ - int status; - struct device *device = &pdev->dev; - char fw_name[100]; - int i; - - memset(&wl->fw, 0, sizeof(struct brcms_firmware)); - for (i = 0; i < MAX_FW_IMAGES; i++) { - if (brcms_firmwares[i] == NULL) - break; - sprintf(fw_name, "%s-%d.fw", brcms_firmwares[i], - UCODE_LOADER_API_VER); - status = request_firmware(&wl->fw.fw_bin[i], fw_name, device); - if (status) { - wiphy_err(wl->wiphy, "%s: fail to load firmware %s\n", - KBUILD_MODNAME, fw_name); - return status; - } - sprintf(fw_name, "%s_hdr-%d.fw", brcms_firmwares[i], - UCODE_LOADER_API_VER); - status = request_firmware(&wl->fw.fw_hdr[i], fw_name, device); - if (status) { - wiphy_err(wl->wiphy, "%s: fail to load firmware %s\n", - KBUILD_MODNAME, fw_name); - return status; - } - wl->fw.hdr_num_entries[i] = - wl->fw.fw_hdr[i]->size / (sizeof(struct firmware_hdr)); - } - wl->fw.fw_cnt = i; - return brcms_ucode_data_init(wl, &wl->ucode); -} - -/* - * Precondition: Since this function is called in brcms_pci_probe() context, - * no locking is required. - */ -static void brcms_release_fw(struct brcms_info *wl) -{ - int i; - for (i = 0; i < MAX_FW_IMAGES; i++) { - release_firmware(wl->fw.fw_bin[i]); - release_firmware(wl->fw.fw_hdr[i]); - } -} - -/** - * This function frees the WL per-device resources. - * - * This function frees resources owned by the WL device pointed to - * by the wl parameter. - * - * precondition: can both be called locked and unlocked - * - */ -static void brcms_free(struct brcms_info *wl) -{ - struct brcms_timer *t, *next; - - /* free ucode data */ - if (wl->fw.fw_cnt) - brcms_ucode_data_free(&wl->ucode); - if (wl->irq) - free_irq(wl->irq, wl); - - /* kill dpc */ - tasklet_kill(&wl->tasklet); - - if (wl->pub) { - brcms_debugfs_detach(wl->pub); - brcms_c_module_unregister(wl->pub, "linux", wl); - } - - /* free common resources */ - if (wl->wlc) { - brcms_c_detach(wl->wlc); - wl->wlc = NULL; - wl->pub = NULL; - } - - /* virtual interface deletion is deferred so we cannot spinwait */ - - /* wait for all pending callbacks to complete */ - while (atomic_read(&wl->callbacks) > 0) - schedule(); - - /* free timers */ - for (t = wl->timers; t; t = next) { - next = t->next; -#ifdef DEBUG - kfree(t->name); -#endif - kfree(t); - } -} - -/* -* called from both kernel as from this kernel module (error flow on attach) -* precondition: perimeter lock is not acquired. -*/ -static void brcms_remove(struct bcma_device *pdev) -{ - struct ieee80211_hw *hw = bcma_get_drvdata(pdev); - struct brcms_info *wl = hw->priv; - - if (wl->wlc) { - wiphy_rfkill_set_hw_state(wl->pub->ieee_hw->wiphy, false); - wiphy_rfkill_stop_polling(wl->pub->ieee_hw->wiphy); - ieee80211_unregister_hw(hw); - } - - brcms_free(wl); - - bcma_set_drvdata(pdev, NULL); - ieee80211_free_hw(hw); -} - static irqreturn_t brcms_isr(int irq, void *dev_id) { struct brcms_info *wl; @@ -1047,18 +1057,8 @@ static struct brcms_info *brcms_attach(struct bcma_device *pdev) spin_lock_init(&wl->lock); spin_lock_init(&wl->isr_lock); - /* prepare ucode */ - if (brcms_request_fw(wl, pdev) < 0) { - wiphy_err(wl->wiphy, "%s: Failed to find firmware usually in " - "%s\n", KBUILD_MODNAME, "/lib/firmware/brcm"); - brcms_release_fw(wl); - brcms_remove(pdev); - return NULL; - } - /* common load-time initialization */ wl->wlc = brcms_c_attach((void *)wl, pdev, unit, false, &err); - brcms_release_fw(wl); if (!wl->wlc) { wiphy_err(wl->wiphy, "%s: attach() failed with code %d\n", KBUILD_MODNAME, err); From ec376a2ab97ec3be52ca282dc6ac102e805d1005 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Thu, 4 Apr 2013 10:35:35 -0700 Subject: [PATCH 117/303] cpufreq / intel_pstate: Set timer timeout correctly The current calculation of the delay time is wrong and a cut and paste error from a previous experimental driver. This can result in the timeout being set to jiffies + 1 which setup the driver to race with itself if the APIC timer interrupt happens at just the right time. References: https://bugzilla.redhat.com/show_bug.cgi?id=920289 Reported-by: Adam Williamson Reported-and-tested-by: Parag Warudkar Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ad72922919ed..6133ef5cf671 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -502,7 +502,6 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) sample_time = cpu->pstate_policy->sample_rate_ms; delay = msecs_to_jiffies(sample_time); - delay -= jiffies % delay; mod_timer_pinned(&cpu->timer, jiffies + delay); } From 6f389a8f1dd22a24f3d9afc2812b30d639e94625 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 7 Apr 2013 02:14:14 +0000 Subject: [PATCH 118/303] PM / reboot: call syscore_shutdown() after disable_nonboot_cpus() As commit 40dc166c (PM / Core: Introduce struct syscore_ops for core subsystems PM) say, syscore_ops operations should be carried with one CPU on-line and interrupts disabled. However, after commit f96972f2d (kernel/sys.c: call disable_nonboot_cpus() in kernel_restart()), syscore_shutdown() is called before disable_nonboot_cpus(), so break the rules. We have a MIPS machine with a 8259A PIC, and there is an external timer (HPET) linked at 8259A. Since 8259A has been shutdown too early (by syscore_shutdown()), disable_nonboot_cpus() runs without timer interrupt, so it hangs and reboot fails. This patch call syscore_shutdown() a little later (after disable_nonboot_cpus()) to avoid reboot failure, this is the same way as poweroff does. For consistency, add disable_nonboot_cpus() to kernel_halt(). Signed-off-by: Huacai Chen Cc: Signed-off-by: Rafael J. Wysocki --- kernel/sys.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sys.c b/kernel/sys.c index 39c9c4a2949f..0da73cf73e60 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -324,7 +324,6 @@ void kernel_restart_prepare(char *cmd) system_state = SYSTEM_RESTART; usermodehelper_disable(); device_shutdown(); - syscore_shutdown(); } /** @@ -370,6 +369,7 @@ void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); disable_nonboot_cpus(); + syscore_shutdown(); if (!cmd) printk(KERN_EMERG "Restarting system.\n"); else @@ -395,6 +395,7 @@ static void kernel_shutdown_prepare(enum system_states state) void kernel_halt(void) { kernel_shutdown_prepare(SYSTEM_HALT); + disable_nonboot_cpus(); syscore_shutdown(); printk(KERN_EMERG "System halted.\n"); kmsg_dump(KMSG_DUMP_HALT); From ffcdedb667b6db8ee31c7efa76a3ec59d9c3b0fc Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Sat, 6 Apr 2013 00:54:37 +0000 Subject: [PATCH 119/303] Revert "bonding: remove sysfs before removing devices" This reverts commit 4de79c737b200492195ebc54a887075327e1ec1d. This patch introduces a new bug which causes access to freed memory. In bond_uninit: list_del(&bond->bond_list); bond_list is linked in bond_net's dev_list which is freed by unregister_pernet_subsys. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 171b10f167a5..a51241b2e621 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4902,8 +4902,8 @@ static void __exit bonding_exit(void) bond_destroy_debugfs(); - unregister_pernet_subsys(&bond_net_ops); rtnl_link_unregister(&bond_link_ops); + unregister_pernet_subsys(&bond_net_ops); #ifdef CONFIG_NET_POLL_CONTROLLER /* From 69b0216ac255f523556fa3d4ff030d857eaaa37f Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Sat, 6 Apr 2013 00:54:38 +0000 Subject: [PATCH 120/303] bonding: fix bonding_masters race condition in bond unloading While the bonding module is unloading, it is considered that after rtnl_link_unregister all bond devices are destroyed but since no synchronization mechanism exists, a new bond device can be created via bonding_masters before unregister_pernet_subsys which would lead to multiple problems (e.g. NULL pointer dereference, wrong RIP, list corruption). This patch fixes the issue by removing any bond devices left in the netns after bonding_masters is removed from sysfs. Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a51241b2e621..07401a3e256b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4846,9 +4846,18 @@ static int __net_init bond_net_init(struct net *net) static void __net_exit bond_net_exit(struct net *net) { struct bond_net *bn = net_generic(net, bond_net_id); + struct bonding *bond, *tmp_bond; + LIST_HEAD(list); bond_destroy_sysfs(bn); bond_destroy_proc_dir(bn); + + /* Kill off any bonds created after unregistering bond rtnl ops */ + rtnl_lock(); + list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) + unregister_netdevice_queue(bond->dev, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); } static struct pernet_operations bond_net_ops = { From 4f7f46d32c9875004fae1d57ae3c02cc2e6cd6a3 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 3 Apr 2013 14:17:37 +0800 Subject: [PATCH 121/303] tcm_vhost: Use vq->private_data to indicate if the endpoint is setup Currently, vs->vs_endpoint is used indicate if the endpoint is setup or not. It is set or cleared in vhost_scsi_set_endpoint() or vhost_scsi_clear_endpoint() under the vs->dev.mutex lock. However, when we check it in vhost_scsi_handle_vq(), we ignored the lock. Instead of using the vs->vs_endpoint and the vs->dev.mutex lock to indicate the status of the endpoint, we use per virtqueue vq->private_data to indicate it. In this way, we can only take the vq->mutex lock which is per queue and make the concurrent multiqueue process having less lock contention. Further, in the read side of vq->private_data, we can even do not take the lock if it is accessed in the vhost worker thread, because it is protected by "vhost rcu". (nab: Do s/VHOST_FEATURES/~VHOST_SCSI_FEATURES) Signed-off-by: Asias He Acked-by: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger --- drivers/vhost/tcm_vhost.c | 144 ++++++++++++++++++++++++++------------ 1 file changed, 101 insertions(+), 43 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index dd9614eb2577..6cda137bb208 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -74,9 +74,8 @@ enum { struct vhost_scsi { /* Protected by vhost_scsi->dev.mutex */ - struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET]; + struct tcm_vhost_tpg **vs_tpg; char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; - bool vs_endpoint; struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ]; @@ -582,6 +581,7 @@ static void tcm_vhost_submission_work(struct work_struct *work) static void vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) { + struct tcm_vhost_tpg **vs_tpg; struct virtio_scsi_cmd_req v_req; struct tcm_vhost_tpg *tv_tpg; struct tcm_vhost_cmd *tv_cmd; @@ -590,8 +590,16 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, int head, ret; u8 target; - /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */ - if (unlikely(!vs->vs_endpoint)) + /* + * We can handle the vq only after the endpoint is setup by calling the + * VHOST_SCSI_SET_ENDPOINT ioctl. + * + * TODO: Check that we are running from vhost_worker which acts + * as read-side critical section for vhost kind of RCU. + * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h + */ + vs_tpg = rcu_dereference_check(vq->private_data, 1); + if (!vs_tpg) return; mutex_lock(&vq->mutex); @@ -661,7 +669,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, /* Extract the tpgt */ target = v_req.lun[1]; - tv_tpg = ACCESS_ONCE(vs->vs_tpg[target]); + tv_tpg = ACCESS_ONCE(vs_tpg[target]); /* Target does not exist, fail the request */ if (unlikely(!tv_tpg)) { @@ -780,6 +788,20 @@ static void vhost_scsi_handle_kick(struct vhost_work *work) vhost_scsi_handle_vq(vs, vq); } +static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index) +{ + vhost_poll_flush(&vs->dev.vqs[index].poll); +} + +static void vhost_scsi_flush(struct vhost_scsi *vs) +{ + int i; + + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) + vhost_scsi_flush_vq(vs, i); + vhost_work_flush(&vs->dev, &vs->vs_completion_work); +} + /* * Called from vhost_scsi_ioctl() context to walk the list of available * tcm_vhost_tpg with an active struct tcm_vhost_nexus @@ -790,8 +812,10 @@ static int vhost_scsi_set_endpoint( { struct tcm_vhost_tport *tv_tport; struct tcm_vhost_tpg *tv_tpg; + struct tcm_vhost_tpg **vs_tpg; + struct vhost_virtqueue *vq; + int index, ret, i, len; bool match = false; - int index, ret; mutex_lock(&vs->dev.mutex); /* Verify that ring has been setup correctly. */ @@ -803,6 +827,15 @@ static int vhost_scsi_set_endpoint( } } + len = sizeof(vs_tpg[0]) * VHOST_SCSI_MAX_TARGET; + vs_tpg = kzalloc(len, GFP_KERNEL); + if (!vs_tpg) { + mutex_unlock(&vs->dev.mutex); + return -ENOMEM; + } + if (vs->vs_tpg) + memcpy(vs_tpg, vs->vs_tpg, len); + mutex_lock(&tcm_vhost_mutex); list_for_each_entry(tv_tpg, &tcm_vhost_list, tv_tpg_list) { mutex_lock(&tv_tpg->tv_tpg_mutex); @@ -817,14 +850,15 @@ static int vhost_scsi_set_endpoint( tv_tport = tv_tpg->tport; if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) { - if (vs->vs_tpg[tv_tpg->tport_tpgt]) { + if (vs->vs_tpg && vs->vs_tpg[tv_tpg->tport_tpgt]) { mutex_unlock(&tv_tpg->tv_tpg_mutex); mutex_unlock(&tcm_vhost_mutex); mutex_unlock(&vs->dev.mutex); + kfree(vs_tpg); return -EEXIST; } tv_tpg->tv_tpg_vhost_count++; - vs->vs_tpg[tv_tpg->tport_tpgt] = tv_tpg; + vs_tpg[tv_tpg->tport_tpgt] = tv_tpg; smp_mb__after_atomic_inc(); match = true; } @@ -835,12 +869,26 @@ static int vhost_scsi_set_endpoint( if (match) { memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, sizeof(vs->vs_vhost_wwpn)); - vs->vs_endpoint = true; + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + vq = &vs->vqs[i]; + /* Flushing the vhost_work acts as synchronize_rcu */ + mutex_lock(&vq->mutex); + rcu_assign_pointer(vq->private_data, vs_tpg); + mutex_unlock(&vq->mutex); + } ret = 0; } else { ret = -EEXIST; } + /* + * Act as synchronize_rcu to make sure access to + * old vs->vs_tpg is finished. + */ + vhost_scsi_flush(vs); + kfree(vs->vs_tpg); + vs->vs_tpg = vs_tpg; + mutex_unlock(&vs->dev.mutex); return ret; } @@ -851,6 +899,8 @@ static int vhost_scsi_clear_endpoint( { struct tcm_vhost_tport *tv_tport; struct tcm_vhost_tpg *tv_tpg; + struct vhost_virtqueue *vq; + bool match = false; int index, ret, i; u8 target; @@ -862,9 +912,14 @@ static int vhost_scsi_clear_endpoint( goto err_dev; } } + + if (!vs->vs_tpg) { + mutex_unlock(&vs->dev.mutex); + return 0; + } + for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { target = i; - tv_tpg = vs->vs_tpg[target]; if (!tv_tpg) continue; @@ -886,10 +941,27 @@ static int vhost_scsi_clear_endpoint( } tv_tpg->tv_tpg_vhost_count--; vs->vs_tpg[target] = NULL; - vs->vs_endpoint = false; + match = true; mutex_unlock(&tv_tpg->tv_tpg_mutex); } + if (match) { + for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + vq = &vs->vqs[i]; + /* Flushing the vhost_work acts as synchronize_rcu */ + mutex_lock(&vq->mutex); + rcu_assign_pointer(vq->private_data, NULL); + mutex_unlock(&vq->mutex); + } + } + /* + * Act as synchronize_rcu to make sure access to + * old vs->vs_tpg is finished. + */ + vhost_scsi_flush(vs); + kfree(vs->vs_tpg); + vs->vs_tpg = NULL; mutex_unlock(&vs->dev.mutex); + return 0; err_tpg: @@ -899,6 +971,24 @@ err_dev: return ret; } +static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) +{ + if (features & ~VHOST_SCSI_FEATURES) + return -EOPNOTSUPP; + + mutex_lock(&vs->dev.mutex); + if ((features & (1 << VHOST_F_LOG_ALL)) && + !vhost_log_access_ok(&vs->dev)) { + mutex_unlock(&vs->dev.mutex); + return -EFAULT; + } + vs->dev.acked_features = features; + smp_wmb(); + vhost_scsi_flush(vs); + mutex_unlock(&vs->dev.mutex); + return 0; +} + static int vhost_scsi_open(struct inode *inode, struct file *f) { struct vhost_scsi *s; @@ -939,38 +1029,6 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) return 0; } -static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index) -{ - vhost_poll_flush(&vs->dev.vqs[index].poll); -} - -static void vhost_scsi_flush(struct vhost_scsi *vs) -{ - int i; - - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) - vhost_scsi_flush_vq(vs, i); - vhost_work_flush(&vs->dev, &vs->vs_completion_work); -} - -static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) -{ - if (features & ~VHOST_SCSI_FEATURES) - return -EOPNOTSUPP; - - mutex_lock(&vs->dev.mutex); - if ((features & (1 << VHOST_F_LOG_ALL)) && - !vhost_log_access_ok(&vs->dev)) { - mutex_unlock(&vs->dev.mutex); - return -EFAULT; - } - vs->dev.acked_features = features; - smp_wmb(); - vhost_scsi_flush(vs); - mutex_unlock(&vs->dev.mutex); - return 0; -} - static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) { From dfd5d5692c7ddf27380511b80a3dc590acfc4eee Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 3 Apr 2013 14:17:38 +0800 Subject: [PATCH 122/303] tcm_vhost: Initialize vq->last_used_idx when set endpoint This patch fixes guest hang when booting seabios and guest. [ 0.576238] scsi0 : Virtio SCSI HBA [ 0.616754] virtio_scsi virtio1: request:id 0 is not a head! vq->last_used_idx is initialized only when /dev/vhost-scsi is opened or closed. vhost_scsi_open -> vhost_dev_init() -> vhost_vq_reset() vhost_scsi_release() -> vhost_dev_cleanup -> vhost_vq_reset() So, when guest talks to tcm_vhost after seabios does, vq->last_used_idx still contains the old valule for seabios. This confuses guest. Fix this by calling vhost_init_used() to init vq->last_used_idx when we set endpoint. Signed-off-by: Asias He Acked-by: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger --- drivers/vhost/tcm_vhost.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 6cda137bb208..c127731b4230 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -874,6 +874,7 @@ static int vhost_scsi_set_endpoint( /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); rcu_assign_pointer(vq->private_data, vs_tpg); + vhost_init_used(vq); mutex_unlock(&vq->mutex); } ret = 0; From 88c5b5ce5cb57af6ca2a7cf4d5715fa320448ff9 Mon Sep 17 00:00:00 2001 From: Michael Riesch Date: Mon, 8 Apr 2013 05:45:26 +0000 Subject: [PATCH 123/303] rtnetlink: Call nlmsg_parse() with correct header length Signed-off-by: Michael Riesch Cc: "David S. Miller" Cc: Greg Kroah-Hartman Cc: Jiri Benc Cc: "Theodore Ts'o" Cc: linux-kernel@vger.kernel.org Acked-by: Mark Rustad Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b65441da74ab..23854b51a259 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1072,7 +1072,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; - if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) @@ -1922,7 +1922,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) u32 ext_filter_mask = 0; u16 min_ifinfo_dump_size = 0; - if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); From f9c41a62bba3f3f7ef3541b2a025e3371bcbba97 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Sun, 7 Apr 2013 22:19:26 +0000 Subject: [PATCH 124/303] af_iucv: fix recvmsg by replacing skb_pull() function When receiving data messages, the "BUG_ON(skb->len < skb->data_len)" in the skb_pull() function triggers a kernel panic. Replace the skb_pull logic by a per skb offset as advised by Eric Dumazet. Signed-off-by: Ursula Braun Signed-off-by: Frank Blaschka Reviewed-by: Hendrik Brueckner Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/iucv/af_iucv.h | 8 ++++++++ net/iucv/af_iucv.c | 34 ++++++++++++++++------------------ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index cc7c19732389..714cc9a54a4c 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -130,6 +130,14 @@ struct iucv_sock { enum iucv_tx_notify n); }; +struct iucv_skb_cb { + u32 class; /* target class of message */ + u32 tag; /* tag associated with message */ + u32 offset; /* offset for skb receival */ +}; + +#define IUCV_SKB_CB(__skb) ((struct iucv_skb_cb *)&((__skb)->cb[0])) + /* iucv socket options (SOL_IUCV) */ #define SO_IPRMDATA_MSG 0x0080 /* send/recv IPRM_DATA msgs */ #define SO_MSGLIMIT 0x1000 /* get/set IUCV MSGLIMIT */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index bf6935820001..206ce6db2c36 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -49,12 +49,6 @@ static const u8 iprm_shutdown[8] = #define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class)) -/* macros to set/get socket control buffer at correct offset */ -#define CB_TAG(skb) ((skb)->cb) /* iucv message tag */ -#define CB_TAG_LEN (sizeof(((struct iucv_message *) 0)->tag)) -#define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ -#define CB_TRGCLS_LEN (TRGCLS_SIZE) - #define __iucv_sock_wait(sk, condition, timeo, ret) \ do { \ DEFINE_WAIT(__wait); \ @@ -1141,7 +1135,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; - memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + IUCV_SKB_CB(skb)->tag = txmsg.tag; if (iucv->transport == AF_IUCV_TRANS_HIPER) { atomic_inc(&iucv->msg_sent); @@ -1224,7 +1218,7 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len) return -ENOMEM; /* copy target class to control buffer of new skb */ - memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN); + IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class; /* copy data fragment */ memcpy(nskb->data, skb->data + copied, size); @@ -1256,7 +1250,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, /* store msg target class in the second 4 bytes of skb ctrl buffer */ /* Note: the first 4 bytes are reserved for msg tag */ - memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN); + IUCV_SKB_CB(skb)->class = msg->class; /* check for special IPRM messages (e.g. iucv_sock_shutdown) */ if ((msg->flags & IUCV_IPRMDATA) && len > 7) { @@ -1292,6 +1286,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, } } + IUCV_SKB_CB(skb)->offset = 0; if (sock_queue_rcv_skb(sk, skb)) skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); } @@ -1327,6 +1322,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned int copied, rlen; struct sk_buff *skb, *rskb, *cskb; int err = 0; + u32 offset; msg->msg_namelen = 0; @@ -1348,13 +1344,14 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - rlen = skb->len; /* real length of skb */ + offset = IUCV_SKB_CB(skb)->offset; + rlen = skb->len - offset; /* real length of skb */ copied = min_t(unsigned int, rlen, len); if (!rlen) sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN; cskb = skb; - if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { + if (skb_copy_datagram_iovec(cskb, offset, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; @@ -1372,7 +1369,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, * get the trgcls from the control buffer of the skb due to * fragmentation of original iucv message. */ err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS, - CB_TRGCLS_LEN, CB_TRGCLS(skb)); + sizeof(IUCV_SKB_CB(skb)->class), + (void *)&IUCV_SKB_CB(skb)->class); if (err) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); @@ -1384,9 +1382,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, /* SOCK_STREAM: re-queue skb if it contains unreceived data */ if (sk->sk_type == SOCK_STREAM) { - skb_pull(skb, copied); - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); + if (copied < rlen) { + IUCV_SKB_CB(skb)->offset = offset + copied; goto done; } } @@ -1405,6 +1402,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, spin_lock_bh(&iucv->message_q.lock); rskb = skb_dequeue(&iucv->backlog_skb_q); while (rskb) { + IUCV_SKB_CB(rskb)->offset = 0; if (sock_queue_rcv_skb(sk, rskb)) { skb_queue_head(&iucv->backlog_skb_q, rskb); @@ -1832,7 +1830,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) { + if (msg->tag != IUCV_SKB_CB(list_skb)->tag) { this = list_skb; break; } @@ -2093,6 +2091,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); skb_reset_transport_header(skb); skb_reset_network_header(skb); + IUCV_SKB_CB(skb)->offset = 0; spin_lock(&iucv->message_q.lock); if (skb_queue_empty(&iucv->backlog_skb_q)) { if (sock_queue_rcv_skb(sk, skb)) { @@ -2197,8 +2196,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, /* fall through and receive zero length data */ case 0: /* plain data frame */ - memcpy(CB_TRGCLS(skb), &trans_hdr->iucv_hdr.class, - CB_TRGCLS_LEN); + IUCV_SKB_CB(skb)->class = trans_hdr->iucv_hdr.class; err = afiucv_hs_callback_rx(sk, skb); break; default: From 65d8013cbdc661f9cb7645148de1facfe3d0c88a Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Sun, 7 Apr 2013 22:19:27 +0000 Subject: [PATCH 125/303] qeth: fix qeth_wait_for_threads() deadlock for OSN devices Any recovery thread will deadlock when calling qeth_wait_for_threads(), most notably when triggering a recovery on an OSN device. This patch will store the recovery thread's task pointer on recovery invocation and check in qeth_wait_for_threads() respectively to avoid deadlocks. Signed-off-by: Stefan Raspl Signed-off-by: Frank Blaschka Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 +++ drivers/s390/net/qeth_core_main.c | 19 +++++++++++++++++++ drivers/s390/net/qeth_l2_main.c | 2 ++ drivers/s390/net/qeth_l3_main.c | 2 ++ 4 files changed, 26 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 8c0622399fcd..6ccb7457746b 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -769,6 +769,7 @@ struct qeth_card { unsigned long thread_start_mask; unsigned long thread_allowed_mask; unsigned long thread_running_mask; + struct task_struct *recovery_task; spinlock_t ip_lock; struct list_head ip_list; struct list_head *ip_tbd_list; @@ -862,6 +863,8 @@ extern struct qeth_card_list_struct qeth_core_card_list; extern struct kmem_cache *qeth_core_header_cache; extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS]; +void qeth_set_recovery_task(struct qeth_card *); +void qeth_clear_recovery_task(struct qeth_card *); void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int); int qeth_threads_running(struct qeth_card *, unsigned long); int qeth_wait_for_threads(struct qeth_card *, unsigned long); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 0d73a999983d..451f92020599 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -177,6 +177,23 @@ const char *qeth_get_cardname_short(struct qeth_card *card) return "n/a"; } +void qeth_set_recovery_task(struct qeth_card *card) +{ + card->recovery_task = current; +} +EXPORT_SYMBOL_GPL(qeth_set_recovery_task); + +void qeth_clear_recovery_task(struct qeth_card *card) +{ + card->recovery_task = NULL; +} +EXPORT_SYMBOL_GPL(qeth_clear_recovery_task); + +static bool qeth_is_recovery_task(const struct qeth_card *card) +{ + return card->recovery_task == current; +} + void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask) { @@ -205,6 +222,8 @@ EXPORT_SYMBOL_GPL(qeth_threads_running); int qeth_wait_for_threads(struct qeth_card *card, unsigned long threads) { + if (qeth_is_recovery_task(card)) + return 0; return wait_event_interruptible(card->wait_q, qeth_threads_running(card, threads) == 0); } diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index d690166efeaf..155b101bd730 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1143,6 +1143,7 @@ static int qeth_l2_recover(void *ptr) QETH_CARD_TEXT(card, 2, "recover2"); dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); + qeth_set_recovery_task(card); __qeth_l2_set_offline(card->gdev, 1); rc = __qeth_l2_set_online(card->gdev, 1); if (!rc) @@ -1153,6 +1154,7 @@ static int qeth_l2_recover(void *ptr) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); } + qeth_clear_recovery_task(card); qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD); qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD); return 0; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 8710337dab3e..1f7edf1b26c3 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3515,6 +3515,7 @@ static int qeth_l3_recover(void *ptr) QETH_CARD_TEXT(card, 2, "recover2"); dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); + qeth_set_recovery_task(card); __qeth_l3_set_offline(card->gdev, 1); rc = __qeth_l3_set_online(card->gdev, 1); if (!rc) @@ -3525,6 +3526,7 @@ static int qeth_l3_recover(void *ptr) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); } + qeth_clear_recovery_task(card); qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD); qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD); return 0; From 79e5f05edcbf85825d19eb8a425cd6c36c6c66f1 Mon Sep 17 00:00:00 2001 From: Christian Ruppert Date: Mon, 8 Apr 2013 13:05:30 +0530 Subject: [PATCH 126/303] ARC: Add implicit compiler barrier to raw_local_irq* functions ARC irqsave/restore macros were missing the compiler barrier, causing a stale load in irq-enabled region be used in irq-safe region, despite being changed, because the register holding the value was still live. The problem manifested as random crashes in timer code when stress testing ARCLinux (3.9-rc3) on a !SMP && !PREEMPT_COUNT Here's the exact sequence which caused this: (0). tv1[x] <----> t1 <---> t2 (1). mod_timer(t1) interrupted after it calls timer_pending() (2). mod_timer(t2) completes (3). mod_timer(t1) resumes but messes up the list (4). __runt_timers( ) uses bogus timer_list entry / crashes in timer->function Essentially mod_timer() was racing against itself and while the spinlock serialized the tv1[] timer link list, timer_pending() called outside the spinlock, cached timer link list element in a register. With low register pressure (and a deep register file), lack of barrier in raw_local_irqsave() as well as preempt_disable (!PREEMPT_COUNT version), there was nothing to force gcc to reload across the spinlock, causing a stale value in reg be used for link list manipulation - ensuing a corruption. ARcompact disassembly which shows the culprit generated code: mod_timer: push_s blink mov_s r13,r0 # timer, timer .. ###### timer_pending( ) ld_s r3,[r13] # <------ .entry.next LOADED brne r3, 0, @.L163 .L163: .. ###### spin_lock_irq( ) lr r5, [status32] # flags bic r4, r5, 6 # temp, flags, and.f 0, r5, 6 # flags, flag.nz r4 ###### detach_if_pending( ) begins tst_s r3,r3 <-------------- # timer_pending( ) checks timer->entry.next # r3 is NOT reloaded by gcc, using stale value beq.d @.L169 mov.eq r0,0 ##### detach_timer( ): __list_del( ) ld r4,[r13,4] # .entry.prev, D.31439 st r4,[r3,4] # .prev, D.31439 st r3,[r4] # .next, D.30246 We initially tried to fix this by adding barrier() to preempt_* macros for !PREEMPT_COUNT but Linus clarified that it was anything but wrong. http://www.spinics.net/lists/kernel/msg1512709.html [vgupta: updated commitlog] Reported-by/Signed-off-by: Christian Ruppert Cc: Christian Ruppert Cc: Pierrick Hascoet Debugged-by/Signed-off-by: Vineet Gupta Signed-off-by: Linus Torvalds --- arch/arc/include/asm/irqflags.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h index ccd84806b62f..eac071668201 100644 --- a/arch/arc/include/asm/irqflags.h +++ b/arch/arc/include/asm/irqflags.h @@ -39,7 +39,7 @@ static inline long arch_local_irq_save(void) " flag.nz %0 \n" : "=r"(temp), "=r"(flags) : "n"((STATUS_E1_MASK | STATUS_E2_MASK)) - : "cc"); + : "memory", "cc"); return flags; } @@ -53,7 +53,8 @@ static inline void arch_local_irq_restore(unsigned long flags) __asm__ __volatile__( " flag %0 \n" : - : "r"(flags)); + : "r"(flags) + : "memory"); } /* @@ -73,7 +74,8 @@ static inline void arch_local_irq_disable(void) " and %0, %0, %1 \n" " flag %0 \n" : "=&r"(temp) - : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK))); + : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK)) + : "memory"); } /* @@ -85,7 +87,9 @@ static inline long arch_local_save_flags(void) __asm__ __volatile__( " lr %0, [status32] \n" - : "=&r"(temp)); + : "=&r"(temp) + : + : "memory"); return temp; } From b1ea3e6e3883c5191caa5be72b3a5b9c32812e38 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Sun, 7 Apr 2013 21:01:19 -0500 Subject: [PATCH 127/303] drm/nouveau: fix unconditional return waiting on memory Typo in nv50_display_flip_wait allows page flipping to run ahead before memory has time to settle. Signed-off-by: Calvin Owens Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 7f0e6c3f37d1..1ddc03e51bf4 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -479,7 +479,7 @@ nv50_display_flip_wait(void *data) { struct nv50_display_flip *flip = data; if (nouveau_bo_rd32(flip->disp->sync, flip->chan->addr / 4) == - flip->chan->data); + flip->chan->data) return true; usleep_range(1, 2); return false; From c802d759623acbd6e1ee9fbdabae89159a513913 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 9 Apr 2013 10:07:19 +0800 Subject: [PATCH 128/303] netrom: fix invalid use of sizeof in nr_recvmsg() sizeof() when applied to a pointer typed expression gives the size of the pointer, not that of the pointed data. Introduced by commit 3ce5ef(netrom: fix info leak via msg_name in nr_recvmsg) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7fcb307dea47..103bd704b5fc 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1173,7 +1173,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, } if (sax != NULL) { - memset(sax, 0, sizeof(sax)); + memset(sax, 0, sizeof(*sax)); sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); From b2dfaa8d33cee9dd4ed78979f5d70063df546101 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 8 Apr 2013 14:21:31 +1000 Subject: [PATCH 129/303] m68k: define a local gpio_request_one() function Compiling for linux-3.9-rc1 and later fails with: drivers/gpio/devres.c: In function 'devm_gpio_request_one': drivers/gpio/devres.c:90:2: error: implicit declaration of function 'gpio_request_one' [-Werror=implicit-function-declaration] So provide a local gpio_request_one() function. Code largely borrowed from blackfin's local gpio_request_one() function. Signed-off-by: Greg Ungerer Acked-by: Linus Walleij --- arch/m68k/include/asm/gpio.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/m68k/include/asm/gpio.h b/arch/m68k/include/asm/gpio.h index 4395ffc51fdb..8cc83431805b 100644 --- a/arch/m68k/include/asm/gpio.h +++ b/arch/m68k/include/asm/gpio.h @@ -86,4 +86,24 @@ static inline int gpio_cansleep(unsigned gpio) return gpio < MCFGPIO_PIN_MAX ? 0 : __gpio_cansleep(gpio); } +static inline int gpio_request_one(unsigned gpio, unsigned long flags, const char *label) +{ + int err; + + err = gpio_request(gpio, label); + if (err) + return err; + + if (flags & GPIOF_DIR_IN) + err = gpio_direction_input(gpio); + else + err = gpio_direction_output(gpio, + (flags & GPIOF_INIT_HIGH) ? 1 : 0); + + if (err) + gpio_free(gpio); + + return err; +} + #endif From 3480a2125923e4b7a56d79efc76743089bf273fc Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 9 Apr 2013 14:16:04 +0800 Subject: [PATCH 130/303] can: gw: use kmem_cache_free() instead of kfree() Memory allocated by kmem_cache_alloc() should be freed using kmem_cache_free(), not kfree(). Cc: linux-stable # >= v3.2 Signed-off-by: Wei Yongjun Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/gw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/can/gw.c b/net/can/gw.c index 2d117dc5ebea..117814a7e73c 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -466,7 +466,7 @@ static int cgw_notifier(struct notifier_block *nb, if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); } } } @@ -864,7 +864,7 @@ static void cgw_remove_all_jobs(void) hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); } } @@ -920,7 +920,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); err = 0; break; } From a6e4d5a03e9e3587e88aba687d8f225f4f04c792 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Mar 2013 09:14:30 +0000 Subject: [PATCH 131/303] x86, efivars: firmware bug workarounds should be in platform code Let's not burden ia64 with checks in the common efivars code that we're not writing too much data to the variable store. That kind of thing is an x86 firmware bug, plain and simple. efi_query_variable_store() provides platforms with a wrapper in which they can perform checks and workarounds for EFI variable storage bugs. Cc: H. Peter Anvin Cc: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 25 +++++++++++++++++++++++++ drivers/firmware/efivars.c | 18 +++--------------- include/linux/efi.h | 9 ++++++++- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 5f2ecaf3f9d8..c89c245eff40 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -999,3 +999,28 @@ u64 efi_mem_attributes(unsigned long phys_addr) } return 0; } + +/* + * Some firmware has serious problems when using more than 50% of the EFI + * variable store, i.e. it triggers bugs that can brick machines. Ensure that + * we never use more than this safe limit. + * + * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable + * store. + */ +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + if (status != EFI_SUCCESS) + return status; + + if (!storage_size || size > remaining_size || size > max_size || + (remaining_size - size) < (storage_size / 2)) + return EFI_OUT_OF_RESOURCES; + + return EFI_SUCCESS; +} diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 7acafb80fd4c..bf15d81d74e1 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -436,24 +436,12 @@ static efi_status_t check_var_size_locked(struct efivars *efivars, u32 attributes, unsigned long size) { - u64 storage_size, remaining_size, max_size; - efi_status_t status; const struct efivar_operations *fops = efivars->ops; - if (!efivars->ops->query_variable_info) + if (!efivars->ops->query_variable_store) return EFI_UNSUPPORTED; - status = fops->query_variable_info(attributes, &storage_size, - &remaining_size, &max_size); - - if (status != EFI_SUCCESS) - return status; - - if (!storage_size || size > remaining_size || size > max_size || - (remaining_size - size) < (storage_size / 2)) - return EFI_OUT_OF_RESOURCES; - - return status; + return fops->query_variable_store(attributes, size); } @@ -2131,7 +2119,7 @@ efivars_init(void) ops.get_variable = efi.get_variable; ops.set_variable = efi.set_variable; ops.get_next_variable = efi.get_next_variable; - ops.query_variable_info = efi.query_variable_info; + ops.query_variable_store = efi_query_variable_store; error = register_efivars(&__efivars, &ops, efi_kobj); if (error) diff --git a/include/linux/efi.h b/include/linux/efi.h index 9bf2f1fcae27..3d7df3d32c66 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -333,6 +333,7 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules, unsigned long count, u64 *max_size, int *reset_type); +typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long size); /* * EFI Configuration Table and GUID definitions @@ -575,9 +576,15 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos #ifdef CONFIG_X86 extern void efi_late_init(void); extern void efi_free_boot_services(void); +extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size); #else static inline void efi_late_init(void) {} static inline void efi_free_boot_services(void) {} + +static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +{ + return EFI_SUCCESS; +} #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern u64 efi_get_iobase (void); @@ -731,7 +738,7 @@ struct efivar_operations { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; - efi_query_variable_info_t *query_variable_info; + efi_query_variable_store_t *query_variable_store; }; struct efivars { From cab1e0a36c9dd0b0671fb84197ed294513f5adc1 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Fri, 29 Mar 2013 16:20:09 +0100 Subject: [PATCH 132/303] ARM: clk-imx35: Bugfix iomux clock This patch enables iomuxc_gate clock. It is necessary to be able to reconfigure iomux pads. Without this clock enabled, the clk_disable_unused function will disable this clock and the iomux pads are not configurable anymore. This happens at every boot. After a reboot (watchdog system reset) the clock is not enabled again, so all iomux pad reconfigurations in boot code are without effect. The iomux pads should be always configurable, so this patch always enables it. Signed-off-by: Markus Pargmann Cc: stable@vger.kernel.org Signed-off-by: Sascha Hauer Signed-off-by: Shawn Guo --- arch/arm/mach-imx/clk-imx35.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-imx/clk-imx35.c b/arch/arm/mach-imx/clk-imx35.c index e13a8fa5e62c..b95898a3912c 100644 --- a/arch/arm/mach-imx/clk-imx35.c +++ b/arch/arm/mach-imx/clk-imx35.c @@ -265,6 +265,7 @@ int __init mx35_clocks_init(void) clk_prepare_enable(clk[iim_gate]); clk_prepare_enable(clk[emi_gate]); clk_prepare_enable(clk[max_gate]); + clk_prepare_enable(clk[iomuxc_gate]); /* * SCC is needed to boot via mmc after a watchdog reset. The clock code From 75498083e25e96932ad998ffdeadb17234c68d3a Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Fri, 29 Mar 2013 16:20:10 +0100 Subject: [PATCH 133/303] ARM: imx35 Bugfix admux clock The admux clock seems to be the audmux clock as tests show. audmux does not work without this clock enabled. Currently imx35 does not register a clock device for audmux. This patch adds this registration. imx-audmux driver already handles a clock device, so no changes are necessary there. Signed-off-by: Markus Pargmann Cc: stable@vger.kernel.org Signed-off-by: Sascha Hauer Signed-off-by: Shawn Guo --- arch/arm/mach-imx/clk-imx35.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-imx/clk-imx35.c b/arch/arm/mach-imx/clk-imx35.c index b95898a3912c..2193c834f55c 100644 --- a/arch/arm/mach-imx/clk-imx35.c +++ b/arch/arm/mach-imx/clk-imx35.c @@ -257,6 +257,7 @@ int __init mx35_clocks_init(void) clk_register_clkdev(clk[wdog_gate], NULL, "imx2-wdt.0"); clk_register_clkdev(clk[nfc_div], NULL, "imx25-nand.0"); clk_register_clkdev(clk[csi_gate], NULL, "mx3-camera.0"); + clk_register_clkdev(clk[admux_gate], "audmux", NULL); clk_prepare_enable(clk[spba_gate]); clk_prepare_enable(clk[gpio1_gate]); From 2bb4b70b1dbb45f0c1a3ba98066e6635d8aa3fe0 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 3 Apr 2013 23:50:09 +0800 Subject: [PATCH 134/303] ARM: imx: provide twd clock lookup from device tree While booting from device tree, imx6q used to provide twd clock lookup by calling clk_register_clkdev() in clock driver. However, the commit bd60345 (ARM: use device tree to get smp_twd clock) forces DT boot to look up the clock from device tree. It causes the failure below when twd driver tries to get the clock, and hence kernel has to calibrate the local timer frequency. smp_twd: clock not found -2 ... Calibrating local timer... 396.13MHz. Fix the regression by providing twd clock lookup from device tree, and remove the unused twd clk_register_clkdev() call from clock driver. Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl.dtsi | 1 + arch/arm/mach-imx/clk-imx6q.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 06ec460b4581..281a223591ff 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -91,6 +91,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x00a00600 0x20>; interrupts = <1 13 0xf01>; + clocks = <&clks 15>; }; L2: l2-cache@00a02000 { diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 2f9ff93a4e61..22a3021a455b 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -443,7 +443,6 @@ int __init mx6q_clocks_init(void) clk_register_clkdev(clk[gpt_ipg], "ipg", "imx-gpt.0"); clk_register_clkdev(clk[gpt_ipg_per], "per", "imx-gpt.0"); - clk_register_clkdev(clk[twd], NULL, "smp_twd"); clk_register_clkdev(clk[cko1_sel], "cko1_sel", NULL); clk_register_clkdev(clk[ahb], "ahb", NULL); clk_register_clkdev(clk[cko1], "cko1", NULL); From e8094b2c17126c7dfdeafa296f206a4a3b122d23 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Thu, 4 Apr 2013 16:03:29 +0200 Subject: [PATCH 135/303] ARM i.MX6: Fix ldb_di clock selection According to the recent i.MX6 Quad technical reference manual, mode 0x4 (100b) of the CCM_CS2DCR register (address 0x020C402C) bits [11-9] and [14-12] select the PLL3 clock, and not the PLL3 PFD1 540M clock. In our code, the PLL3 root clock is named 'pll3_usb_otg', select this instead of the 540M clock. Signed-off-by: Dirk Behme Signed-off-by: Shawn Guo --- arch/arm/mach-imx/clk-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 22a3021a455b..d38e54f5b6d7 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -115,7 +115,7 @@ static const char *gpu2d_core_sels[] = { "axi", "pll3_usb_otg", "pll2_pfd0_352m" static const char *gpu3d_core_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll2_pfd2_396m", }; static const char *gpu3d_shader_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll2_pfd9_720m", }; static const char *ipu_sels[] = { "mmdc_ch0_axi", "pll2_pfd2_396m", "pll3_120m", "pll3_pfd1_540m", }; -static const char *ldb_di_sels[] = { "pll5_video", "pll2_pfd0_352m", "pll2_pfd2_396m", "mmdc_ch1_axi", "pll3_pfd1_540m", }; +static const char *ldb_di_sels[] = { "pll5_video", "pll2_pfd0_352m", "pll2_pfd2_396m", "mmdc_ch1_axi", "pll3_usb_otg", }; static const char *ipu_di_pre_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll5_video", "pll2_pfd0_352m", "pll2_pfd2_396m", "pll3_pfd1_540m", }; static const char *ipu1_di0_sels[] = { "ipu1_di0_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", }; static const char *ipu1_di1_sels[] = { "ipu1_di1_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", }; From b530f742ac27460d41d35b638ad6aad92044a982 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Mon, 8 Apr 2013 21:39:45 +0900 Subject: [PATCH 136/303] ARM: S3C24XX: Correct NR_IRQS definition for s3c2440 Due to NR_IRQS being incorrectly defined not all IRQ domains can be registered for S3C2440. It causes following errors on a s3c2440 SoC based board: NR_IRQS:89 S3C2440: IRQ Support irq: clearing pending status 00000002 ------------[ cut here ]------------ WARNING: at kernel/irq/irqdomain.c:234 0xc0056ed0() ... irq: could not create irq-domain ... s3c2410-wdt s3c2410-wdt: failed to install irq (-22) s3c2410-wdt: probe of s3c2410-wdt failed with error -22 ... samsung-uart s3c2440-uart.0: cannot get irq 74 Fix this by increasing NR_IRQS to at least (IRQ_S3C2443_AC97 + 1) if CPU_S3C2440 is selected, so the subintc IRQ domain gets properly registered. Signed-off-by: Tomasz Figa Signed-off-by: Sylwester Nawrocki Acked-by: Heiko Stuebner Signed-off-by: Kukjin Kim --- arch/arm/mach-s3c24xx/include/mach/irqs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/mach-s3c24xx/include/mach/irqs.h b/arch/arm/mach-s3c24xx/include/mach/irqs.h index b7a9f4d469e8..1e73f5fa8659 100644 --- a/arch/arm/mach-s3c24xx/include/mach/irqs.h +++ b/arch/arm/mach-s3c24xx/include/mach/irqs.h @@ -188,10 +188,8 @@ #if defined(CONFIG_CPU_S3C2416) #define NR_IRQS (IRQ_S3C2416_I2S1 + 1) -#elif defined(CONFIG_CPU_S3C2443) -#define NR_IRQS (IRQ_S3C2443_AC97+1) #else -#define NR_IRQS (IRQ_S3C2440_AC97+1) +#define NR_IRQS (IRQ_S3C2443_AC97 + 1) #endif /* compatibility define. */ From 646dd2f0a980949b05042792fbadd72b735c3eda Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 9 Apr 2013 23:52:21 +0900 Subject: [PATCH 137/303] ARM: S3C24XX: Fix interrupt pending register offset of the EINT controller The external pending interrupt register address (EINTPEND) offset is 0xa8, not 0x08. Without this patch the external interrupts are not properly acknowledged, which may lead to an interrupt storm and the system hang as soon as any external interrupt is requested. Signed-off-by: Sylwester Nawrocki Reviewed-by: Heiko Stuebner Signed-off-by: Kukjin Kim --- arch/arm/mach-s3c24xx/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-s3c24xx/irq.c b/arch/arm/mach-s3c24xx/irq.c index cb9f5e011e73..d8ba9bee4c7e 100644 --- a/arch/arm/mach-s3c24xx/irq.c +++ b/arch/arm/mach-s3c24xx/irq.c @@ -500,7 +500,7 @@ struct s3c_irq_intc *s3c24xx_init_intc(struct device_node *np, base = (void *)0xfd000000; intc->reg_mask = base + 0xa4; - intc->reg_pending = base + 0x08; + intc->reg_pending = base + 0xa8; irq_num = 20; irq_start = S3C2410_IRQ(32); irq_offset = 4; From f1ca493b0b5e8f42d3b2dc8877860db2983f47b6 Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Tue, 9 Apr 2013 17:13:59 +0200 Subject: [PATCH 138/303] ASoC: wm8903: Fix the bypass to HP/LINEOUT when no DAC or ADC is running The Charge Pump needs the DSP clock to work properly, without it the bypass to HP/LINEOUT is not working properly. This requirement is not mentioned in the datasheet but has been confirmed by Mark Brown from Wolfson. Signed-off-by: Alban Bedel Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm8903.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 134e41c870b9..f8a31ad0b203 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1083,6 +1083,8 @@ static const struct snd_soc_dapm_route wm8903_intercon[] = { { "ROP", NULL, "Right Speaker PGA" }, { "RON", NULL, "Right Speaker PGA" }, + { "Charge Pump", NULL, "CLK_DSP" }, + { "Left Headphone Output PGA", NULL, "Charge Pump" }, { "Right Headphone Output PGA", NULL, "Charge Pump" }, { "Left Line Output PGA", NULL, "Charge Pump" }, From f6f629f8332ea70255f6c60c904270640a21a114 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 5 Apr 2013 13:19:26 +0100 Subject: [PATCH 139/303] ASoC: wm5102: Correct lookup of arizona struct in SYSCLK event Reported-by: Ryo Tsutsui Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm5102.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index b82bbf584146..34d0201d6a78 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -584,7 +584,7 @@ static int wm5102_sysclk_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_codec *codec = w->codec; - struct arizona *arizona = dev_get_drvdata(codec->dev); + struct arizona *arizona = dev_get_drvdata(codec->dev->parent); struct regmap *regmap = codec->control_data; const struct reg_default *patch = NULL; int i, patch_size; From 51a246aa5c0a14b3d34a5c6d3c9e271c784b127f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Apr 2013 18:03:25 +0100 Subject: [PATCH 140/303] regmap: Back out work buffer fix This reverts commit bc8ce4 (regmap: don't corrupt work buffer in _regmap_raw_write()) since it turns out that it can cause issues when taken in isolation from the other changes in -next that lead to its discovery. On the basis that nobody noticed the problems for quite some time without that subsequent work let's drop it from v3.9. Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index d34adef1e63e..58cfb3232428 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -943,7 +943,8 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg, unsigned int ival; int val_bytes = map->format.val_bytes; for (i = 0; i < val_len / val_bytes; i++) { - ival = map->format.parse_val(val + (i * val_bytes)); + memcpy(map->work_buf, val + (i * val_bytes), val_bytes); + ival = map->format.parse_val(map->work_buf); ret = regcache_write(map, reg + (i * map->reg_stride), ival); if (ret) { From 3e2e0d2c222bdf5bafd722dec1618fa6073ef372 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 9 Apr 2013 12:33:07 -0400 Subject: [PATCH 141/303] tile: comment assumption about __insn_mtspr for The arch_local_irq_save(), etc., routines are required to function as compiler barriers. They do, but it's subtle and requires knowing that the gcc builtin __insn_mtspr() is marked as a memory clobber. Provide a comment explaining the assumption. Signed-off-by: Chris Metcalf [ This came about from me wondering about the synchronization rules of __insn_mtspr() - Linus ] Signed-off-by: Linus Torvalds --- arch/tile/include/asm/irqflags.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index 241c0bb60b12..c96f9bbb760d 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -40,7 +40,15 @@ #include #include -/* Set and clear kernel interrupt masks. */ +/* + * Set and clear kernel interrupt masks. + * + * NOTE: __insn_mtspr() is a compiler builtin marked as a memory + * clobber. We rely on it being equivalent to a compiler barrier in + * this code since arch_local_irq_save() and friends must act as + * compiler barriers. This compiler semantic is baked into enough + * places that the compiler will maintain it going forward. + */ #if CHIP_HAS_SPLIT_INTR_MASK() #if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32 # error Fix assumptions about which word various interrupts are in From fa332941c0c7c00e3420078268b7558d0ef792b5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2013 12:56:52 -0400 Subject: [PATCH 142/303] NFSv4: Fix another potential state manager deadlock Don't hold the NFSv4 sequence id while we check for open permission. The call to ACCESS may block due to reboot recovery. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26431cf62ddb..0ad025eb523b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1046,6 +1046,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) /* Save the delegation */ nfs4_stateid_copy(&stateid, &delegation->stateid); rcu_read_unlock(); + nfs_release_seqid(opendata->o_arg.seqid); ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); if (ret != 0) goto out; From ca10b9e9a8ca7342ee07065289cbe74ac128c169 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Apr 2013 17:58:11 +0000 Subject: [PATCH 143/303] selinux: add a skb_owned_by() hook Commit 90ba9b1986b5ac (tcp: tcp_make_synack() can use alloc_skb()) broke certain SELinux/NetLabel configurations by no longer correctly assigning the sock to the outgoing SYNACK packet. Cost of atomic operations on the LISTEN socket is quite big, and we would like it to happen only if really needed. This patch introduces a new security_ops->skb_owned_by() method, that is a void operation unless selinux is active. Reported-by: Miroslav Vadkerti Diagnosed-by: Paul Moore Signed-off-by: Eric Dumazet Cc: "David S. Miller" Cc: linux-security-module@vger.kernel.org Acked-by: James Morris Tested-by: Paul Moore Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/security.h | 8 ++++++++ net/ipv4/tcp_output.c | 1 + security/capability.c | 6 ++++++ security/security.c | 5 +++++ security/selinux/hooks.c | 7 +++++++ 5 files changed, 27 insertions(+) diff --git a/include/linux/security.h b/include/linux/security.h index eee7478cda70..6c3a78ace051 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1638,6 +1638,7 @@ struct security_operations { int (*tun_dev_attach_queue) (void *security); int (*tun_dev_attach) (struct sock *sk, void *security); int (*tun_dev_open) (void *security); + void (*skb_owned_by) (struct sk_buff *skb, struct sock *sk); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2588,6 +2589,8 @@ int security_tun_dev_attach_queue(void *security); int security_tun_dev_attach(struct sock *sk, void *security); int security_tun_dev_open(void *security); +void security_skb_owned_by(struct sk_buff *skb, struct sock *sk); + #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, struct sock *other, @@ -2779,6 +2782,11 @@ static inline int security_tun_dev_open(void *security) { return 0; } + +static inline void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ +} + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5d0b4387cba6..b44cf81d8178 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2709,6 +2709,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, MAX_TCP_HEADER); skb_dst_set(skb, dst); + security_skb_owned_by(skb, sk); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) diff --git a/security/capability.c b/security/capability.c index 579775088967..6783c3e6c88e 100644 --- a/security/capability.c +++ b/security/capability.c @@ -737,6 +737,11 @@ static int cap_tun_dev_open(void *security) { return 0; } + +static void cap_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ +} + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1071,6 +1076,7 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, tun_dev_open); set_to_cap_if_null(ops, tun_dev_attach_queue); set_to_cap_if_null(ops, tun_dev_attach); + set_to_cap_if_null(ops, skb_owned_by); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_cap_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/security.c b/security/security.c index 7b88c6aeaed4..03f248b84e9f 100644 --- a/security/security.c +++ b/security/security.c @@ -1290,6 +1290,11 @@ int security_tun_dev_open(void *security) } EXPORT_SYMBOL(security_tun_dev_open); +void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ + security_ops->skb_owned_by(skb, sk); +} + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2fa28c88900c..7171a957b933 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -51,6 +51,7 @@ #include #include #include /* for local_port_range[] */ +#include #include /* struct or_callable used in sock_rcv_skb */ #include #include @@ -4363,6 +4364,11 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); } +static void selinux_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ + skb_set_owner_w(skb, sk); +} + static int selinux_secmark_relabel_packet(u32 sid) { const struct task_security_struct *__tsec; @@ -5664,6 +5670,7 @@ static struct security_operations selinux_ops = { .tun_dev_attach_queue = selinux_tun_dev_attach_queue, .tun_dev_attach = selinux_tun_dev_attach, .tun_dev_open = selinux_tun_dev_open, + .skb_owned_by = selinux_skb_owned_by, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, From 386afc91144b36b42117b0092893f15bc8798a80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 9 Apr 2013 10:48:33 -0700 Subject: [PATCH 144/303] spinlocks and preemption points need to be at least compiler barriers In UP and non-preempt respectively, the spinlocks and preemption disable/enable points are stubbed out entirely, because there is no regular code that can ever hit the kind of concurrency they are meant to protect against. However, while there is no regular code that can cause scheduling, we _do_ end up having some exceptional (literally!) code that can do so, and that we need to make sure does not ever get moved into the critical region by the compiler. In particular, get_user() and put_user() is generally implemented as inline asm statements (even if the inline asm may then make a call instruction to call out-of-line), and can obviously cause a page fault and IO as a result. If that inline asm has been scheduled into the middle of a preemption-safe (or spinlock-protected) code region, we obviously lose. Now, admittedly this is *very* unlikely to actually ever happen, and we've not seen examples of actual bugs related to this. But partly exactly because it's so hard to trigger and the resulting bug is so subtle, we should be extra careful to get this right. So make sure that even when preemption is disabled, and we don't have to generate any actual *code* to explicitly tell the system that we are in a preemption-disabled region, we need to at least tell the compiler not to move things around the critical region. This patch grew out of the same discussion that caused commits 79e5f05edcbf ("ARC: Add implicit compiler barrier to raw_local_irq* functions") and 3e2e0d2c222b ("tile: comment assumption about __insn_mtspr for ") to come about. Note for stable: use discretion when/if applying this. As mentioned, this bug may never have actually bitten anybody, and gcc may never have done the required code motion for it to possibly ever trigger in practice. Cc: stable@vger.kernel.org Cc: Steven Rostedt Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- include/linux/preempt.h | 20 +++++++++++++------- include/linux/spinlock_up.h | 29 ++++++++++++++++++----------- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 5a710b9c578e..87a03c746f17 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -93,14 +93,20 @@ do { \ #else /* !CONFIG_PREEMPT_COUNT */ -#define preempt_disable() do { } while (0) -#define sched_preempt_enable_no_resched() do { } while (0) -#define preempt_enable_no_resched() do { } while (0) -#define preempt_enable() do { } while (0) +/* + * Even if we don't have any preemption, we need preempt disable/enable + * to be barriers, so that we don't have things like get_user/put_user + * that can cause faults and scheduling migrate into our preempt-protected + * region. + */ +#define preempt_disable() barrier() +#define sched_preempt_enable_no_resched() barrier() +#define preempt_enable_no_resched() barrier() +#define preempt_enable() barrier() -#define preempt_disable_notrace() do { } while (0) -#define preempt_enable_no_resched_notrace() do { } while (0) -#define preempt_enable_notrace() do { } while (0) +#define preempt_disable_notrace() barrier() +#define preempt_enable_no_resched_notrace() barrier() +#define preempt_enable_notrace() barrier() #endif /* CONFIG_PREEMPT_COUNT */ diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index a26e2fb604e6..e2369c167dbd 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -16,7 +16,10 @@ * In the debug case, 1 means unlocked, 0 means locked. (the values * are inverted, to catch initialization bugs) * - * No atomicity anywhere, we are on UP. + * No atomicity anywhere, we are on UP. However, we still need + * the compiler barriers, because we do not want the compiler to + * move potentially faulting instructions (notably user accesses) + * into the locked sequence, resulting in non-atomic execution. */ #ifdef CONFIG_DEBUG_SPINLOCK @@ -25,6 +28,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { lock->slock = 0; + barrier(); } static inline void @@ -32,6 +36,7 @@ arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { local_irq_save(flags); lock->slock = 0; + barrier(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) @@ -39,32 +44,34 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) char oldval = lock->slock; lock->slock = 0; + barrier(); return oldval > 0; } static inline void arch_spin_unlock(arch_spinlock_t *lock) { + barrier(); lock->slock = 1; } /* * Read-write spinlocks. No debug version. */ -#define arch_read_lock(lock) do { (void)(lock); } while (0) -#define arch_write_lock(lock) do { (void)(lock); } while (0) -#define arch_read_trylock(lock) ({ (void)(lock); 1; }) -#define arch_write_trylock(lock) ({ (void)(lock); 1; }) -#define arch_read_unlock(lock) do { (void)(lock); } while (0) -#define arch_write_unlock(lock) do { (void)(lock); } while (0) +#define arch_read_lock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_write_lock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_read_trylock(lock) ({ barrier(); (void)(lock); 1; }) +#define arch_write_trylock(lock) ({ barrier(); (void)(lock); 1; }) +#define arch_read_unlock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_write_unlock(lock) do { barrier(); (void)(lock); } while (0) #else /* DEBUG_SPINLOCK */ #define arch_spin_is_locked(lock) ((void)(lock), 0) /* for sched.c and kernel_lock.c: */ -# define arch_spin_lock(lock) do { (void)(lock); } while (0) -# define arch_spin_lock_flags(lock, flags) do { (void)(lock); } while (0) -# define arch_spin_unlock(lock) do { (void)(lock); } while (0) -# define arch_spin_trylock(lock) ({ (void)(lock); 1; }) +# define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0) +# define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0) +# define arch_spin_unlock(lock) do { barrier(); (void)(lock); } while (0) +# define arch_spin_trylock(lock) ({ barrier(); (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ #define arch_spin_is_contended(lock) (((void)(lock), 0)) From 52f21999c7b921a0390708b66ed286282c2e4bee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Mar 2013 13:30:23 -0400 Subject: [PATCH 145/303] ecryptfs: close rmmod race Signed-off-by: Al Viro --- fs/ecryptfs/miscdev.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 412e6eda25f8..e4141f257495 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -80,13 +80,6 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) int rc; mutex_lock(&ecryptfs_daemon_hash_mux); - rc = try_module_get(THIS_MODULE); - if (rc == 0) { - rc = -EIO; - printk(KERN_ERR "%s: Error attempting to increment module use " - "count; rc = [%d]\n", __func__, rc); - goto out_unlock_daemon_list; - } rc = ecryptfs_find_daemon_by_euid(&daemon); if (!rc) { rc = -EINVAL; @@ -96,7 +89,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) if (rc) { printk(KERN_ERR "%s: Error attempting to spawn daemon; " "rc = [%d]\n", __func__, rc); - goto out_module_put_unlock_daemon_list; + goto out_unlock_daemon_list; } mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) { @@ -108,9 +101,6 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) atomic_inc(&ecryptfs_num_miscdev_opens); out_unlock_daemon: mutex_unlock(&daemon->mux); -out_module_put_unlock_daemon_list: - if (rc) - module_put(THIS_MODULE); out_unlock_daemon_list: mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; @@ -147,7 +137,6 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) "bug.\n", __func__, rc); BUG(); } - module_put(THIS_MODULE); return rc; } @@ -471,6 +460,7 @@ out_free: static const struct file_operations ecryptfs_miscdev_fops = { + .owner = THIS_MODULE, .open = ecryptfs_miscdev_open, .poll = ecryptfs_miscdev_poll, .read = ecryptfs_miscdev_read, From 8ce584c7416d8a85a6f3edc17d1cddefe331e87e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Mar 2013 20:13:46 -0400 Subject: [PATCH 146/303] procfs: add proc_remove_subtree() just what it sounds like; do that only to procfs subtrees you've created - doing that to something shared with another driver is not only antisocial, but might cause interesting races with proc_create() and its ilk. Signed-off-by: Al Viro --- fs/proc/generic.c | 119 ++++++++++++++++++++++++++++++---------- include/linux/proc_fs.h | 2 + 2 files changed, 91 insertions(+), 30 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4b3b3ffb52f1..21e1a8f1659d 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -755,37 +755,8 @@ void pde_put(struct proc_dir_entry *pde) free_proc_entry(pde); } -/* - * Remove a /proc entry and free it if it's not currently in use. - */ -void remove_proc_entry(const char *name, struct proc_dir_entry *parent) +static void entry_rundown(struct proc_dir_entry *de) { - struct proc_dir_entry **p; - struct proc_dir_entry *de = NULL; - const char *fn = name; - unsigned int len; - - spin_lock(&proc_subdir_lock); - if (__xlate_proc_name(name, &parent, &fn) != 0) { - spin_unlock(&proc_subdir_lock); - return; - } - len = strlen(fn); - - for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (proc_match(len, fn, *p)) { - de = *p; - *p = de->next; - de->next = NULL; - break; - } - } - spin_unlock(&proc_subdir_lock); - if (!de) { - WARN(1, "name '%s'\n", name); - return; - } - spin_lock(&de->pde_unload_lock); /* * Stop accepting new callers into module. If you're @@ -817,6 +788,40 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) spin_lock(&de->pde_unload_lock); } spin_unlock(&de->pde_unload_lock); +} + +/* + * Remove a /proc entry and free it if it's not currently in use. + */ +void remove_proc_entry(const char *name, struct proc_dir_entry *parent) +{ + struct proc_dir_entry **p; + struct proc_dir_entry *de = NULL; + const char *fn = name; + unsigned int len; + + spin_lock(&proc_subdir_lock); + if (__xlate_proc_name(name, &parent, &fn) != 0) { + spin_unlock(&proc_subdir_lock); + return; + } + len = strlen(fn); + + for (p = &parent->subdir; *p; p=&(*p)->next ) { + if (proc_match(len, fn, *p)) { + de = *p; + *p = de->next; + de->next = NULL; + break; + } + } + spin_unlock(&proc_subdir_lock); + if (!de) { + WARN(1, "name '%s'\n", name); + return; + } + + entry_rundown(de); if (S_ISDIR(de->mode)) parent->nlink--; @@ -827,3 +832,57 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) pde_put(de); } EXPORT_SYMBOL(remove_proc_entry); + +int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) +{ + struct proc_dir_entry **p; + struct proc_dir_entry *root = NULL, *de, *next; + const char *fn = name; + unsigned int len; + + spin_lock(&proc_subdir_lock); + if (__xlate_proc_name(name, &parent, &fn) != 0) { + spin_unlock(&proc_subdir_lock); + return -ENOENT; + } + len = strlen(fn); + + for (p = &parent->subdir; *p; p=&(*p)->next ) { + if (proc_match(len, fn, *p)) { + root = *p; + *p = root->next; + root->next = NULL; + break; + } + } + if (!root) { + spin_unlock(&proc_subdir_lock); + return -ENOENT; + } + de = root; + while (1) { + next = de->subdir; + if (next) { + de->subdir = next->next; + next->next = NULL; + de = next; + continue; + } + spin_unlock(&proc_subdir_lock); + + entry_rundown(de); + next = de->parent; + if (S_ISDIR(de->mode)) + next->nlink--; + de->nlink = 0; + if (de == root) + break; + pde_put(de); + + spin_lock(&proc_subdir_lock); + de = next; + } + pde_put(root); + return 0; +} +EXPORT_SYMBOL(remove_proc_subtree); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 8307f2f94d86..94dfb2aa5533 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -117,6 +117,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, const struct file_operations *proc_fops, void *data); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); +extern int remove_proc_subtree(const char *name, struct proc_dir_entry *parent); struct pid_namespace; @@ -202,6 +203,7 @@ static inline struct proc_dir_entry *proc_create_data(const char *name, return NULL; } #define remove_proc_entry(name, parent) do {} while (0) +#define remove_proc_subtree(name, parent) do {} while (0) static inline struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent,const char *dest) {return NULL;} From ccf932042fa7785832d8989ba1369cd7c7f5d7a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 31 Mar 2013 22:34:37 -0400 Subject: [PATCH 147/303] palinfo fixes * check for proc_mkdir() failures * fix buffer overrun - sizeof(format string) is *not* enough to hold sprintf() result. * use proc_remove_subtree(); life's much easier with it Signed-off-by: Al Viro --- arch/ia64/kernel/palinfo.c | 77 +++++++------------------------------- 1 file changed, 13 insertions(+), 64 deletions(-) diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 77597e5ea60a..79521d5499f9 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -849,17 +849,6 @@ static palinfo_entry_t palinfo_entries[]={ #define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries) -/* - * this array is used to keep track of the proc entries we create. This is - * required in the module mode when we need to remove all entries. The procfs code - * does not do recursion of deletion - * - * Notes: - * - +1 accounts for the cpuN directory entry in /proc/pal - */ -#define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1)) - -static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES]; static struct proc_dir_entry *palinfo_dir; /* @@ -971,60 +960,32 @@ palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, voi static void __cpuinit create_palinfo_proc_entries(unsigned int cpu) { -# define CPUSTR "cpu%d" - pal_func_cpu_u_t f; - struct proc_dir_entry **pdir; struct proc_dir_entry *cpu_dir; int j; - char cpustr[sizeof(CPUSTR)]; - - - /* - * we keep track of created entries in a depth-first order for - * cleanup purposes. Each entry is stored into palinfo_proc_entries - */ - sprintf(cpustr,CPUSTR, cpu); + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", cpu); cpu_dir = proc_mkdir(cpustr, palinfo_dir); + if (!cpu_dir) + return; f.req_cpu = cpu; - /* - * Compute the location to store per cpu entries - * We dont store the top level entry in this list, but - * remove it finally after removing all cpu entries. - */ - pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)]; - *pdir++ = cpu_dir; for (j=0; j < NR_PALINFO_ENTRIES; j++) { f.func_id = j; - *pdir = create_proc_read_entry( - palinfo_entries[j].name, 0, cpu_dir, - palinfo_read_entry, (void *)f.value); - pdir++; + create_proc_read_entry( + palinfo_entries[j].name, 0, cpu_dir, + palinfo_read_entry, (void *)f.value); } } static void remove_palinfo_proc_entries(unsigned int hcpu) { - int j; - struct proc_dir_entry *cpu_dir, **pdir; - - pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)]; - cpu_dir = *pdir; - *pdir++=NULL; - for (j=0; j < (NR_PALINFO_ENTRIES); j++) { - if ((*pdir)) { - remove_proc_entry ((*pdir)->name, cpu_dir); - *pdir ++= NULL; - } - } - - if (cpu_dir) { - remove_proc_entry(cpu_dir->name, palinfo_dir); - } + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", hcpu); + remove_proc_subtree(cpustr, palinfo_dir); } static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb, @@ -1058,6 +1019,8 @@ palinfo_init(void) printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION); palinfo_dir = proc_mkdir("pal", NULL); + if (!palinfo_dir) + return -ENOMEM; /* Create palinfo dirs in /proc for all online cpus */ for_each_online_cpu(i) { @@ -1073,22 +1036,8 @@ palinfo_init(void) static void __exit palinfo_exit(void) { - int i = 0; - - /* remove all nodes: depth first pass. Could optimize this */ - for_each_online_cpu(i) { - remove_palinfo_proc_entries(i); - } - - /* - * Remove the top level entry finally - */ - remove_proc_entry(palinfo_dir->name, NULL); - - /* - * Unregister from cpu notifier callbacks - */ unregister_hotcpu_notifier(&palinfo_cpu_notifier); + remove_proc_subtree("pal", NULL); } module_init(palinfo_init); From e9c5d8a562f01b211926d70443378eb14b29a676 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 9 Apr 2013 17:33:29 +0400 Subject: [PATCH 148/303] mnt: release locks on error path in do_loopback do_loopback calls lock_mount(path) and forget to unlock_mount if clone_mnt or copy_mnt fails. [ 77.661566] ================================================ [ 77.662939] [ BUG: lock held when returning to user space! ] [ 77.664104] 3.9.0-rc5+ #17 Not tainted [ 77.664982] ------------------------------------------------ [ 77.666488] mount/514 is leaving the kernel with locks still held! [ 77.668027] 2 locks held by mount/514: [ 77.668817] #0: (&sb->s_type->i_mutex_key#7){+.+.+.}, at: [] lock_mount+0x32/0xe0 [ 77.671755] #1: (&namespace_sem){+++++.}, at: [] lock_mount+0x4a/0xe0 Signed-off-by: Andrey Vagin Signed-off-by: Al Viro --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 50ca17d3cb45..6c7d31eebba4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1686,7 +1686,7 @@ static int do_loopback(struct path *path, const char *old_name, if (IS_ERR(mnt)) { err = PTR_ERR(mnt); - goto out; + goto out2; } err = graft_tree(mnt, path); From 02f815cb6d3f57914228be84df9613ee5a01c2e6 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 9 Apr 2013 08:57:19 +0000 Subject: [PATCH 149/303] netfilter: ipset: list:set: fix reference counter update The last element can be replaced or pushed off and in both cases the reference counter must be updated. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_list_set.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 8371c2bac2e4..09c744aa8982 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -174,9 +174,13 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, { const struct set_elem *e = list_set_elem(map, i); - if (i == map->size - 1 && e->id != IPSET_INVALID_ID) - /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(e->id); + if (e->id != IPSET_INVALID_ID) { + const struct set_elem *x = list_set_elem(map, map->size - 1); + + /* Last element replaced or pushed off */ + if (x->id != IPSET_INVALID_ID) + ip_set_put_byindex(x->id); + } if (with_timeout(map->timeout)) list_elem_tadd(map, i, id, ip_set_timeout_set(timeout)); else From 6eb4c7e96e19fd2c38a103472048fc0e0e0a3ec3 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 9 Apr 2013 08:57:20 +0000 Subject: [PATCH 150/303] netfilter: ipset: hash:*net*: nomatch flag not excluded on set resize If a resize is triggered the nomatch flag is not excluded at hashing, which leads to the element missed at lookup in the resized set. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set_ahash.h | 30 +++++++++++++++----- net/netfilter/ipset/ip_set_hash_ipportnet.c | 18 ++++++++++++ net/netfilter/ipset/ip_set_hash_net.c | 22 ++++++++++++-- net/netfilter/ipset/ip_set_hash_netiface.c | 22 ++++++++++++-- net/netfilter/ipset/ip_set_hash_netport.c | 18 ++++++++++++ 5 files changed, 99 insertions(+), 11 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h index 01d25e6fc792..0214c4c146fa 100644 --- a/include/linux/netfilter/ipset/ip_set_ahash.h +++ b/include/linux/netfilter/ipset/ip_set_ahash.h @@ -291,6 +291,7 @@ ip_set_hash_destroy(struct ip_set *set) #define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist) #define type_pf_data_next TOKEN(TYPE, PF, _data_next) #define type_pf_data_flags TOKEN(TYPE, PF, _data_flags) +#define type_pf_data_reset_flags TOKEN(TYPE, PF, _data_reset_flags) #ifdef IP_SET_HASH_WITH_NETS #define type_pf_data_match TOKEN(TYPE, PF, _data_match) #else @@ -385,9 +386,9 @@ type_pf_resize(struct ip_set *set, bool retried) struct ip_set_hash *h = set->data; struct htable *t, *orig = h->table; u8 htable_bits = orig->htable_bits; - const struct type_pf_elem *data; + struct type_pf_elem *data; struct hbucket *n, *m; - u32 i, j; + u32 i, j, flags = 0; int ret; retry: @@ -412,9 +413,16 @@ retry: n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { data = ahash_data(n, j); +#ifdef IP_SET_HASH_WITH_NETS + flags = 0; + type_pf_data_reset_flags(data, &flags); +#endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = type_pf_elem_add(m, data, AHASH_MAX(h), 0); + ret = type_pf_elem_add(m, data, AHASH_MAX(h), flags); if (ret < 0) { +#ifdef IP_SET_HASH_WITH_NETS + type_pf_data_flags(data, flags); +#endif read_unlock_bh(&set->lock); ahash_destroy(t); if (ret == -EAGAIN) @@ -836,9 +844,9 @@ type_pf_tresize(struct ip_set *set, bool retried) struct ip_set_hash *h = set->data; struct htable *t, *orig = h->table; u8 htable_bits = orig->htable_bits; - const struct type_pf_elem *data; + struct type_pf_elem *data; struct hbucket *n, *m; - u32 i, j; + u32 i, j, flags = 0; int ret; /* Try to cleanup once */ @@ -873,10 +881,17 @@ retry: n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { data = ahash_tdata(n, j); +#ifdef IP_SET_HASH_WITH_NETS + flags = 0; + type_pf_data_reset_flags(data, &flags); +#endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), 0, - ip_set_timeout_get(type_pf_data_timeout(data))); + ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), flags, + ip_set_timeout_get(type_pf_data_timeout(data))); if (ret < 0) { +#ifdef IP_SET_HASH_WITH_NETS + type_pf_data_flags(data, flags); +#endif read_unlock_bh(&set->lock); ahash_destroy(t); if (ret == -EAGAIN) @@ -1187,6 +1202,7 @@ type_pf_gc_init(struct ip_set *set) #undef type_pf_data_tlist #undef type_pf_data_next #undef type_pf_data_flags +#undef type_pf_data_reset_flags #undef type_pf_data_match #undef type_pf_elem diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f2627226a087..10a30b4fc7db 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -104,6 +104,15 @@ hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem) { @@ -414,6 +423,15 @@ hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem) { diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 4b677cf6bf7d..d6a59154d710 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -87,7 +87,16 @@ hash_net4_data_copy(struct hash_net4_elem *dst, static inline void hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_net4_data_reset_flags(struct hash_net4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int @@ -308,7 +317,16 @@ hash_net6_data_copy(struct hash_net6_elem *dst, static inline void hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_net6_data_reset_flags(struct hash_net6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 6ba985f1c96f..f2b0a3c30130 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -198,7 +198,16 @@ hash_netiface4_data_copy(struct hash_netiface4_elem *dst, static inline void hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netiface4_data_reset_flags(struct hash_netiface4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int @@ -494,7 +503,7 @@ hash_netiface6_data_copy(struct hash_netiface6_elem *dst, static inline void hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } static inline int @@ -503,6 +512,15 @@ hash_netiface6_data_match(const struct hash_netiface6_elem *elem) return elem->nomatch ? -ENOTEMPTY : 1; } +static inline void +hash_netiface6_data_reset_flags(struct hash_netiface6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline void hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) { diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index af20c0c5ced2..349deb672a2d 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -104,6 +104,15 @@ hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_netport4_data_reset_flags(struct hash_netport4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_netport4_data_match(const struct hash_netport4_elem *elem) { @@ -375,6 +384,15 @@ hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_netport6_data_reset_flags(struct hash_netport6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_netport6_data_match(const struct hash_netport6_elem *elem) { From f934af05cb1bf20558542185299394a69060b829 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 9 Apr 2013 15:35:46 -0400 Subject: [PATCH 151/303] add memory barrier to arch_local_irq_restore arch_local_irq_save() and friends are required to act as compiler memory barriers. This patch adds a "memory" clobber to the inline asm code in arch_local_irq_restore() which is used as the building block for other functions needing to set/clear the interrupt enable in the CSR register. Signed-off-by: Mark Salter --- arch/c6x/include/asm/irqflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/c6x/include/asm/irqflags.h b/arch/c6x/include/asm/irqflags.h index cf78e09e18c3..2c71d5634ec2 100644 --- a/arch/c6x/include/asm/irqflags.h +++ b/arch/c6x/include/asm/irqflags.h @@ -27,7 +27,7 @@ static inline unsigned long arch_local_save_flags(void) /* set interrupt enabled status */ static inline void arch_local_irq_restore(unsigned long flags) { - asm volatile (" mvc .s2 %0,CSR\n" : : "b"(flags)); + asm volatile (" mvc .s2 %0,CSR\n" : : "b"(flags) : "memory"); } /* unconditionally enable interrupts */ From f110c0c1926028a233830c6166e4d40314420823 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 9 Apr 2013 16:18:55 +1000 Subject: [PATCH 152/303] powerpc: fix compiling CONFIG_PPC_TRANSACTIONAL_MEM when CONFIG_ALTIVEC=n We can't compile a kernel with CONFIG_ALTIVEC=n when CONFIG_PPC_TRANSACTIONAL_MEM=y. We currently get: arch/powerpc/kernel/tm.S:320: Error: unsupported relocation against THREAD_VSCR arch/powerpc/kernel/tm.S:323: Error: unsupported relocation against THREAD_VR0 arch/powerpc/kernel/tm.S:323: Error: unsupported relocation against THREAD_VR0 etc. The below fixes this with a sprinkling of #ifdefs. This was found by mpe with kisskb: http://kisskb.ellerman.id.au/kisskb/buildresult/8539442/ Signed-off-by: Michael Neuling Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/process.c | 2 ++ arch/powerpc/kernel/signal_32.c | 2 ++ arch/powerpc/kernel/signal_64.c | 2 ++ arch/powerpc/kernel/tm.S | 2 ++ 4 files changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 59dd545fdde1..16e77a81ab4f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -555,10 +555,12 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) new->thread.regs->msr |= (MSR_FP | new->thread.fpexc_mode); } +#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { do_load_up_transact_altivec(&new->thread); new->thread.regs->msr |= MSR_VEC; } +#endif /* We may as well turn on VSX too since all the state is restored now */ if (msr & MSR_VSX) new->thread.regs->msr |= MSR_VSX; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3acb28e245b4..95068bf569ad 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -866,10 +866,12 @@ static long restore_tm_user_regs(struct pt_regs *regs, do_load_up_transact_fpu(¤t->thread); regs->msr |= (MSR_FP | current->thread.fpexc_mode); } +#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { do_load_up_transact_altivec(¤t->thread); regs->msr |= MSR_VEC; } +#endif return 0; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 995f8543cb57..c1794286098c 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -522,10 +522,12 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, do_load_up_transact_fpu(¤t->thread); regs->msr |= (MSR_FP | current->thread.fpexc_mode); } +#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { do_load_up_transact_altivec(¤t->thread); regs->msr |= MSR_VEC; } +#endif return err; } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 84dbace657ce..2da67e7a16d5 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -309,6 +309,7 @@ _GLOBAL(tm_recheckpoint) or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */ mtmsr r5 +#ifdef CONFIG_ALTIVEC /* FP and VEC registers: These are recheckpointed from thread.fpr[] * and thread.vr[] respectively. The thread.transact_fpr[] version * is more modern, and will be loaded subsequently by any FPUnavailable @@ -323,6 +324,7 @@ _GLOBAL(tm_recheckpoint) REST_32VRS(0, r5, r3) /* r5 scratch, r3 THREAD ptr */ ld r5, THREAD_VRSAVE(r3) mtspr SPRN_VRSAVE, r5 +#endif dont_restore_vec: andi. r0, r4, MSR_FP From 83e03b3fe4daffdebbb42151d5410d730ae50bd1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Apr 2013 21:46:23 +0900 Subject: [PATCH 153/303] tracing: Fix double free when function profile init failed On the failure path, stat->start and stat->pages will refer same page. So it'll attempt to free the same page again and get kernel panic. Link: http://lkml.kernel.org/r/1364820385-32027-1-git-send-email-namhyung@kernel.org Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: stable@vger.kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7e897106b7e0..926ebfb74936 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -694,7 +694,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) free_page(tmp); } - free_page((unsigned long)stat->pages); stat->pages = NULL; stat->start = NULL; From 1812a3db0874be1d1524086da9e84397b800f546 Mon Sep 17 00:00:00 2001 From: Christopher Harvey Date: Fri, 5 Apr 2013 10:51:15 -0400 Subject: [PATCH 154/303] drm/mgag200: Index 24 in extended CRTC registers is 24 in hex, not decimal. This change properly enables the "requester" in G200ER cards that is responsible for getting pixels out of memory and clocking them out to the screen. Signed-off-by: Christopher Harvey Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/mgag200/mgag200_mode.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index fe22bb780e1d..78d8e919509f 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -751,8 +751,6 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, int i; unsigned char misc = 0; unsigned char ext_vga[6]; - unsigned char ext_vga_index24; - unsigned char dac_index90 = 0; u8 bppshift; static unsigned char dacvalue[] = { @@ -803,7 +801,6 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, option2 = 0x0000b000; break; case G200_ER: - dac_index90 = 0; break; } @@ -852,10 +849,8 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, WREG_DAC(i, dacvalue[i]); } - if (mdev->type == G200_ER) { - WREG_DAC(0x90, dac_index90); - } - + if (mdev->type == G200_ER) + WREG_DAC(0x90, 0); if (option) pci_write_config_dword(dev->pdev, PCI_MGA_OPTION, option); @@ -952,8 +947,6 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, if (mdev->type == G200_WB) ext_vga[1] |= 0x88; - ext_vga_index24 = 0x05; - /* Set pixel clocks */ misc = 0x2d; WREG8(MGA_MISC_OUT, misc); @@ -965,7 +958,7 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc, } if (mdev->type == G200_ER) - WREG_ECRT(24, ext_vga_index24); + WREG_ECRT(0x24, 0x5); if (mdev->type == G200_EV) { WREG_ECRT(6, 0); From 765024697807ad1e1cac332aa891253ca4a339da Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 9 Apr 2013 16:33:06 +0200 Subject: [PATCH 155/303] dmaengine: omap-dma: Start DMA without delay for cyclic channels cyclic DMA is only used by audio which needs DMA to be started without a delay. If the DMA for audio is started using the tasklet we experience random channel switch (to be more precise: channel shift). Reported-by: Peter Meerwald CC: stable@vger.kernel.org # v3.7+ Signed-off-by: Peter Ujfalusi Acked-by: Santosh Shilimkar Acked-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/omap-dma.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index c4b4fd2acc42..08b43bf37158 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -276,12 +276,20 @@ static void omap_dma_issue_pending(struct dma_chan *chan) spin_lock_irqsave(&c->vc.lock, flags); if (vchan_issue_pending(&c->vc) && !c->desc) { - struct omap_dmadev *d = to_omap_dma_dev(chan->device); - spin_lock(&d->lock); - if (list_empty(&c->node)) - list_add_tail(&c->node, &d->pending); - spin_unlock(&d->lock); - tasklet_schedule(&d->task); + /* + * c->cyclic is used only by audio and in this case the DMA need + * to be started without delay. + */ + if (!c->cyclic) { + struct omap_dmadev *d = to_omap_dma_dev(chan->device); + spin_lock(&d->lock); + if (list_empty(&c->node)) + list_add_tail(&c->node, &d->pending); + spin_unlock(&d->lock); + tasklet_schedule(&d->task); + } else { + omap_dma_start_desc(c); + } } spin_unlock_irqrestore(&c->vc.lock, flags); } From a54292f58fc16e3d09aecea16de3e58a898dde97 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 9 Apr 2013 19:57:00 +0200 Subject: [PATCH 156/303] kbuild: generate generic headers before recursing into scripts The headers are now needed inside scripts/mod since 6543bec ("mod/file2alias: make modalias generation safe for cross compiling"). Reported-and-tested-by: Lad, Prabhakar Reported-and-tested-by: "Jon Medhurst (Tixy)" Signed-off-by: Andreas Schwab Signed-off-by: Michal Marek --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5bd9f7700eb9..d99f8d898cbd 100644 --- a/Makefile +++ b/Makefile @@ -513,7 +513,8 @@ ifeq ($(KBUILD_EXTMOD),) # Carefully list dependencies so we do not try to build scripts twice # in parallel PHONY += scripts -scripts: scripts_basic include/config/auto.conf include/config/tristate.conf +scripts: scripts_basic include/config/auto.conf include/config/tristate.conf \ + asm-generic $(Q)$(MAKE) $(build)=$(@) # Objects we will link into vmlinux / subdirs we need to visit From 46fc4c909339f5a84d1679045297d9d2fb596987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 2 Apr 2013 15:57:26 +0200 Subject: [PATCH 157/303] ssb: implement spurious tone avoidance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And make use of it in b43. This fixes a regression introduced with 49d55cef5b1925a5c1efb6aaddaa40fc7c693335 b43: N-PHY: implement spurious tone avoidance This commit made BCM4322 use only MCS 0 on channel 13, which of course resulted in performance drop (down to 0.7Mb/s). Reported-by: Stefan Brüns Signed-off-by: Rafał Miłecki Cc: Stable Signed-off-by: John W. Linville --- drivers/net/wireless/b43/phy_n.c | 3 ++- drivers/ssb/driver_chipcommon_pmu.c | 29 +++++++++++++++++++++++ include/linux/ssb/ssb_driver_chipcommon.h | 2 ++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c index e8486c1e091a..b70f220bc4b3 100644 --- a/drivers/net/wireless/b43/phy_n.c +++ b/drivers/net/wireless/b43/phy_n.c @@ -5165,7 +5165,8 @@ static void b43_nphy_pmu_spur_avoid(struct b43_wldev *dev, bool avoid) #endif #ifdef CONFIG_B43_SSB case B43_BUS_SSB: - /* FIXME */ + ssb_pmu_spuravoid_pllupdate(&dev->dev->sdev->bus->chipco, + avoid); break; #endif } diff --git a/drivers/ssb/driver_chipcommon_pmu.c b/drivers/ssb/driver_chipcommon_pmu.c index 4c0f6d883dd3..7b0bce936762 100644 --- a/drivers/ssb/driver_chipcommon_pmu.c +++ b/drivers/ssb/driver_chipcommon_pmu.c @@ -675,3 +675,32 @@ u32 ssb_pmu_get_controlclock(struct ssb_chipcommon *cc) return 0; } } + +void ssb_pmu_spuravoid_pllupdate(struct ssb_chipcommon *cc, int spuravoid) +{ + u32 pmu_ctl = 0; + + switch (cc->dev->bus->chip_id) { + case 0x4322: + ssb_chipco_pll_write(cc, SSB_PMU1_PLLCTL0, 0x11100070); + ssb_chipco_pll_write(cc, SSB_PMU1_PLLCTL1, 0x1014140a); + ssb_chipco_pll_write(cc, SSB_PMU1_PLLCTL5, 0x88888854); + if (spuravoid == 1) + ssb_chipco_pll_write(cc, SSB_PMU1_PLLCTL2, 0x05201828); + else + ssb_chipco_pll_write(cc, SSB_PMU1_PLLCTL2, 0x05001828); + pmu_ctl = SSB_CHIPCO_PMU_CTL_PLL_UPD; + break; + case 43222: + /* TODO: BCM43222 requires updating PLLs too */ + return; + default: + ssb_printk(KERN_ERR PFX + "Unknown spuravoidance settings for chip 0x%04X, not changing PLL\n", + cc->dev->bus->chip_id); + return; + } + + chipco_set32(cc, SSB_CHIPCO_PMU_CTL, pmu_ctl); +} +EXPORT_SYMBOL_GPL(ssb_pmu_spuravoid_pllupdate); diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h index 9e492be5244b..6fcfe99bd999 100644 --- a/include/linux/ssb/ssb_driver_chipcommon.h +++ b/include/linux/ssb/ssb_driver_chipcommon.h @@ -219,6 +219,7 @@ #define SSB_CHIPCO_PMU_CTL 0x0600 /* PMU control */ #define SSB_CHIPCO_PMU_CTL_ILP_DIV 0xFFFF0000 /* ILP div mask */ #define SSB_CHIPCO_PMU_CTL_ILP_DIV_SHIFT 16 +#define SSB_CHIPCO_PMU_CTL_PLL_UPD 0x00000400 #define SSB_CHIPCO_PMU_CTL_NOILPONW 0x00000200 /* No ILP on wait */ #define SSB_CHIPCO_PMU_CTL_HTREQEN 0x00000100 /* HT req enable */ #define SSB_CHIPCO_PMU_CTL_ALPREQEN 0x00000080 /* ALP req enable */ @@ -667,5 +668,6 @@ enum ssb_pmu_ldo_volt_id { void ssb_pmu_set_ldo_voltage(struct ssb_chipcommon *cc, enum ssb_pmu_ldo_volt_id id, u32 voltage); void ssb_pmu_set_ldo_paref(struct ssb_chipcommon *cc, bool on); +void ssb_pmu_spuravoid_pllupdate(struct ssb_chipcommon *cc, int spuravoid); #endif /* LINUX_SSB_CHIPCO_H_ */ From aaaf165b247a1a8ea5cd2936d9fd1eefe5e580f9 Mon Sep 17 00:00:00 2001 From: Nigel Roberts Date: Sun, 31 Mar 2013 15:13:24 +1100 Subject: [PATCH 158/303] Fix GE0/GE1 init on ix2-200 as GE0 has no PHY Signed-off-by: Jason Cooper --- arch/arm/mach-kirkwood/board-iomega_ix2_200.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-kirkwood/board-iomega_ix2_200.c b/arch/arm/mach-kirkwood/board-iomega_ix2_200.c index f655b2637b0e..e5f70415905a 100644 --- a/arch/arm/mach-kirkwood/board-iomega_ix2_200.c +++ b/arch/arm/mach-kirkwood/board-iomega_ix2_200.c @@ -20,10 +20,15 @@ static struct mv643xx_eth_platform_data iomega_ix2_200_ge00_data = { .duplex = DUPLEX_FULL, }; +static struct mv643xx_eth_platform_data iomega_ix2_200_ge01_data = { + .phy_addr = MV643XX_ETH_PHY_ADDR(11), +}; + void __init iomega_ix2_200_init(void) { /* * Basic setup. Needs to be called early. */ - kirkwood_ge01_init(&iomega_ix2_200_ge00_data); + kirkwood_ge00_init(&iomega_ix2_200_ge00_data); + kirkwood_ge01_init(&iomega_ix2_200_ge01_data); } From eb04e0ac198cec3bab407ad220438dfa65c19c67 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 10 Apr 2013 12:44:18 -0400 Subject: [PATCH 159/303] NFSv4: Doh! Typo in the fix to nfs41_walk_client_list Make sure that we set the status to 0 on success. Missed in testing because it never appears when doing multiple mounts to _different_ servers. Signed-off-by: Trond Myklebust Cc: # 3.7.x: 7b1f1fd: NFSv4/4.1: Fix bugs in nfs4[01]_walk_client_list --- fs/nfs/nfs4client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index c7b346f8cc44..66b6664dcd4c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -482,6 +482,7 @@ int nfs41_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); *result = pos; + status = 0; dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); break; From 600468d0686096ddc1000c8a2e69475931084414 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 5 Apr 2013 14:32:52 +0200 Subject: [PATCH 160/303] arm: mvebu: Fix the irq map function in SMP mode This patch fix the regression introduced by the commit 3202bf0157ccb "arm: mvebu: Improve the SMP support of the interrupt controller": GPIO IRQ were no longer delivered to the CPUs. To be delivered to a CPU an interrupt must be enabled at CPU level and at interrupt source level. Before the offending patch, all the interrupts were enabled at source level during map() function. Mask() and unmask() was done by handling the per-CPU part. It was fine when running in UP with only one CPU. The offending patch added support for SMP, in this case mask() and unmask() was done by handling the interrupt source level part. The per-CPU level part was handled by the affinity API to select the CPU which will receive the interrupt. (Due to some hardware limitation only one CPU at a time can received a given interrupt). For "normal" interrupt __setup_irq() was called when an irq was registered. irq_set_affinity() is called from this function, which enabled the interrupt on one of the CPUs. Whereas for GPIO IRQ which were chained interrupts, the irq_set_affinity() was never called and none of the CPUs was selected to receive the interrupt. With this patch all the interrupt are enable on the current CPU during map() function. Enabling the interrupts on a CPU doesn't depend anymore on irq_set_affinity() and then the chained irq are not anymore a special case. However the CPU which will receive the irq can still be modify later using irq_set_affinity(). Tested with Mirabox (A370) and Openblocks AX3 (AXP), rootfs mounted over NFS, compiled with CONFIG_SMP=y/N. Signed-off-by: Gregory CLEMENT Reported-by: Ryan Press Investigated-by: Ezequiel Garcia Tested-by: Ezequiel Garcia Tested-by: Ryan Press Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/irq-armada-370-xp.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/arm/mach-mvebu/irq-armada-370-xp.c b/arch/arm/mach-mvebu/irq-armada-370-xp.c index 6a9195e10579..d5970f5a1e8d 100644 --- a/arch/arm/mach-mvebu/irq-armada-370-xp.c +++ b/arch/arm/mach-mvebu/irq-armada-370-xp.c @@ -61,7 +61,6 @@ static struct irq_domain *armada_370_xp_mpic_domain; */ static void armada_370_xp_irq_mask(struct irq_data *d) { -#ifdef CONFIG_SMP irq_hw_number_t hwirq = irqd_to_hwirq(d); if (hwirq != ARMADA_370_XP_TIMER0_PER_CPU_IRQ) @@ -70,15 +69,10 @@ static void armada_370_xp_irq_mask(struct irq_data *d) else writel(hwirq, per_cpu_int_base + ARMADA_370_XP_INT_SET_MASK_OFFS); -#else - writel(irqd_to_hwirq(d), - per_cpu_int_base + ARMADA_370_XP_INT_SET_MASK_OFFS); -#endif } static void armada_370_xp_irq_unmask(struct irq_data *d) { -#ifdef CONFIG_SMP irq_hw_number_t hwirq = irqd_to_hwirq(d); if (hwirq != ARMADA_370_XP_TIMER0_PER_CPU_IRQ) @@ -87,10 +81,6 @@ static void armada_370_xp_irq_unmask(struct irq_data *d) else writel(hwirq, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); -#else - writel(irqd_to_hwirq(d), - per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); -#endif } #ifdef CONFIG_SMP @@ -146,7 +136,11 @@ static int armada_370_xp_mpic_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { armada_370_xp_irq_mask(irq_get_irq_data(virq)); - writel(hw, main_int_base + ARMADA_370_XP_INT_SET_ENABLE_OFFS); + if (hw != ARMADA_370_XP_TIMER0_PER_CPU_IRQ) + writel(hw, per_cpu_int_base + + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); + else + writel(hw, main_int_base + ARMADA_370_XP_INT_SET_ENABLE_OFFS); irq_set_status_flags(virq, IRQ_LEVEL); if (hw == ARMADA_370_XP_TIMER0_PER_CPU_IRQ) { From 1160c2779b826c6f5c08e5cc542de58fd1f667d5 Mon Sep 17 00:00:00 2001 From: Samu Kallio Date: Sat, 23 Mar 2013 09:36:35 -0400 Subject: [PATCH 161/303] x86, mm, paravirt: Fix vmalloc_fault oops during lazy MMU updates In paravirtualized x86_64 kernels, vmalloc_fault may cause an oops when lazy MMU updates are enabled, because set_pgd effects are being deferred. One instance of this problem is during process mm cleanup with memory cgroups enabled. The chain of events is as follows: - zap_pte_range enables lazy MMU updates - zap_pte_range eventually calls mem_cgroup_charge_statistics, which accesses the vmalloc'd mem_cgroup per-cpu stat area - vmalloc_fault is triggered which tries to sync the corresponding PGD entry with set_pgd, but the update is deferred - vmalloc_fault oopses due to a mismatch in the PUD entries The OOPs usually looks as so: ------------[ cut here ]------------ kernel BUG at arch/x86/mm/fault.c:396! invalid opcode: 0000 [#1] SMP .. snip .. CPU 1 Pid: 10866, comm: httpd Not tainted 3.6.10-4.fc18.x86_64 #1 RIP: e030:[] [] vmalloc_fault+0x11f/0x208 .. snip .. Call Trace: [] do_page_fault+0x399/0x4b0 [] ? xen_mc_extend_args+0xec/0x110 [] page_fault+0x25/0x30 [] ? mem_cgroup_charge_statistics.isra.13+0x13/0x50 [] __mem_cgroup_uncharge_common+0xd8/0x350 [] mem_cgroup_uncharge_page+0x57/0x60 [] page_remove_rmap+0xe0/0x150 [] ? vm_normal_page+0x1a/0x80 [] unmap_single_vma+0x531/0x870 [] unmap_vmas+0x52/0xa0 [] ? pte_mfn_to_pfn+0x72/0x100 [] exit_mmap+0x98/0x170 [] ? __raw_callee_save_xen_pmd_val+0x11/0x1e [] mmput+0x83/0xf0 [] exit_mm+0x104/0x130 [] do_exit+0x15a/0x8c0 [] do_group_exit+0x3f/0xa0 [] sys_exit_group+0x17/0x20 [] system_call_fastpath+0x16/0x1b Calling arch_flush_lazy_mmu_mode immediately after set_pgd makes the changes visible to the consistency checks. Cc: RedHat-Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=914737 Tested-by: Josh Boyer Reported-and-Tested-by: Krishna Raman Signed-off-by: Samu Kallio Link: http://lkml.kernel.org/r/1364045796-10720-1-git-send-email-konrad.wilk@oracle.com Tested-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: H. Peter Anvin --- arch/x86/mm/fault.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2b97525246d4..0e883364abb5 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -378,10 +378,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (pgd_none(*pgd_ref)) return -1; - if (pgd_none(*pgd)) + if (pgd_none(*pgd)) { set_pgd(pgd, *pgd_ref); - else + arch_flush_lazy_mmu_mode(); + } else { BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + } /* * Below here mismatches are bugs because these lower tables From 511ba86e1d386f671084b5d0e6f110bb30b8eeb2 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Sat, 23 Mar 2013 09:36:36 -0400 Subject: [PATCH 162/303] x86, mm: Patch out arch_flush_lazy_mmu_mode() when running on bare metal Invoking arch_flush_lazy_mmu_mode() results in calls to preempt_enable()/disable() which may have performance impact. Since lazy MMU is not used on bare metal we can patch away arch_flush_lazy_mmu_mode() so that it is never called in such environment. [ hpa: the previous patch "Fix vmalloc_fault oops during lazy MMU updates" may cause a minor performance regression on bare metal. This patch resolves that performance regression. It is somewhat unclear to me if this is a good -stable candidate. ] Signed-off-by: Boris Ostrovsky Link: http://lkml.kernel.org/r/1364045796-10720-2-git-send-email-konrad.wilk@oracle.com Tested-by: Josh Boyer Tested-by: Konrad Rzeszutek Wilk Acked-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: H. Peter Anvin Cc: SEE NOTE ABOVE --- arch/x86/include/asm/paravirt.h | 5 ++++- arch/x86/include/asm/paravirt_types.h | 2 ++ arch/x86/kernel/paravirt.c | 25 +++++++++++++------------ arch/x86/lguest/boot.c | 1 + arch/x86/xen/mmu.c | 1 + 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5edd1742cfd0..7361e47db79f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -703,7 +703,10 @@ static inline void arch_leave_lazy_mmu_mode(void) PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } -void arch_flush_lazy_mmu_mode(void); +static inline void arch_flush_lazy_mmu_mode(void) +{ + PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush); +} static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 142236ed83af..b3b0ec1dac86 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -91,6 +91,7 @@ struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ void (*enter)(void); void (*leave)(void); + void (*flush)(void); }; struct pv_time_ops { @@ -679,6 +680,7 @@ void paravirt_end_context_switch(struct task_struct *next); void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); +void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); u32 _paravirt_ident_32(u32); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 17fff18a1031..8bfb335f74bb 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -263,6 +263,18 @@ void paravirt_leave_lazy_mmu(void) leave_lazy(PARAVIRT_LAZY_MMU); } +void paravirt_flush_lazy_mmu(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + void paravirt_start_context_switch(struct task_struct *prev) { BUG_ON(preemptible()); @@ -292,18 +304,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return this_cpu_read(paravirt_lazy_mode); } -void arch_flush_lazy_mmu_mode(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } - - preempt_enable(); -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -475,6 +475,7 @@ struct pv_mmu_ops pv_mmu_ops = { .lazy_mode = { .enter = paravirt_nop, .leave = paravirt_nop, + .flush = paravirt_nop, }, .set_fixmap = native_set_fixmap, diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 1cbd89ca5569..7114c63f047d 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1334,6 +1334,7 @@ __init void lguest_init(void) pv_mmu_ops.read_cr3 = lguest_read_cr3; pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; + pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu; pv_mmu_ops.pte_update = lguest_pte_update; pv_mmu_ops.pte_update_defer = lguest_pte_update; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6afbb2ca9a0a..2f5d6875555e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2196,6 +2196,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .lazy_mode = { .enter = paravirt_enter_lazy_mmu, .leave = xen_leave_lazy_mmu, + .flush = paravirt_flush_lazy_mmu, }, .set_fixmap = xen_set_fixmap, From 61a0f6efc8932e9914e1782ff3a027e23c687fc6 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 9 Apr 2013 23:19:00 +0000 Subject: [PATCH 163/303] e100: Add dma mapping error check e100 uses pci_map_single, but fails to check for a dma mapping error after its use, resulting in a stack trace: [ 46.656594] ------------[ cut here ]------------ [ 46.657004] WARNING: at lib/dma-debug.c:933 check_unmap+0x47b/0x950() [ 46.657004] Hardware name: To Be Filled By O.E.M. [ 46.657004] e100 0000:00:0e.0: DMA-API: device driver failed to check map error[device address=0x000000007a4540fa] [size=90 bytes] [mapped as single] [ 46.657004] Modules linked in: [ 46.657004] w83627hf hwmon_vid snd_via82xx ppdev snd_ac97_codec ac97_bus snd_seq snd_pcm snd_mpu401 snd_mpu401_uart ns558 snd_rawmidi gameport parport_pc e100 snd_seq_device parport snd_page_alloc snd_timer snd soundcore skge shpchp k8temp mii edac_core i2c_viapro edac_mce_amd nfsd auth_rpcgss nfs_acl lockd sunrpc binfmt_misc uinput ata_generic pata_acpi radeon i2c_algo_bit drm_kms_helper ttm firewire_ohci drm firewire_core pata_via sata_via i2c_core sata_promise crc_itu_t [ 46.657004] Pid: 792, comm: ip Not tainted 3.8.0-0.rc6.git0.1.fc19.x86_64 #1 [ 46.657004] Call Trace: [ 46.657004] [] warn_slowpath_common+0x70/0xa0 [ 46.657004] [] warn_slowpath_fmt+0x4c/0x50 [ 46.657004] [] check_unmap+0x47b/0x950 [ 46.657004] [] debug_dma_unmap_page+0x5f/0x70 [ 46.657004] [] ? e100_tx_clean+0x30/0x210 [e100] [ 46.657004] [] e100_tx_clean+0xe8/0x210 [e100] [ 46.657004] [] e100_poll+0x56f/0x6c0 [e100] [ 46.657004] [] ? net_rx_action+0xa1/0x370 [ 46.657004] [] net_rx_action+0x172/0x370 [ 46.657004] [] __do_softirq+0xef/0x3d0 [ 46.657004] [] call_softirq+0x1c/0x30 [ 46.657004] [] do_softirq+0x85/0xc0 [ 46.657004] [] irq_exit+0xd5/0xe0 [ 46.657004] [] do_IRQ+0x56/0xc0 [ 46.657004] [] common_interrupt+0x72/0x72 [ 46.657004] [] ? _raw_spin_unlock_irqrestore+0x3b/0x70 [ 46.657004] [] __slab_free+0x58/0x38b [ 46.657004] [] ? fsnotify_clear_marks_by_inode+0x34/0x120 [ 46.657004] [] ? kmem_cache_free+0x97/0x320 [ 46.657004] [] ? sock_destroy_inode+0x34/0x40 [ 46.657004] [] ? sock_destroy_inode+0x34/0x40 [ 46.657004] [] kmem_cache_free+0x312/0x320 [ 46.657004] [] sock_destroy_inode+0x34/0x40 [ 46.657004] [] destroy_inode+0x38/0x60 [ 46.657004] [] evict+0x10e/0x1a0 [ 46.657004] [] iput+0xf5/0x180 [ 46.657004] [] dput+0x248/0x310 [ 46.657004] [] __fput+0x171/0x240 [ 46.657004] [] ____fput+0xe/0x10 [ 46.657004] [] task_work_run+0xac/0xe0 [ 46.657004] [] do_exit+0x26d/0xc30 [ 46.657004] [] ? finish_task_switch+0x7c/0x120 [ 46.657004] [] ? retint_swapgs+0x13/0x1b [ 46.657004] [] do_group_exit+0x49/0xc0 [ 46.657004] [] sys_exit_group+0x14/0x20 [ 46.657004] [] system_call_fastpath+0x16/0x1b [ 46.657004] ---[ end trace 4468c44e2156e7d1 ]--- [ 46.657004] Mapped at: [ 46.657004] [] debug_dma_map_page+0x91/0x140 [ 46.657004] [] e100_xmit_prepare+0x12b/0x1c0 [e100] [ 46.657004] [] e100_exec_cb+0x84/0x140 [e100] [ 46.657004] [] e100_xmit_frame+0x3a/0x190 [e100] [ 46.657004] [] dev_hard_start_xmit+0x259/0x6c0 Easy fix, modify the cb paramter to e100_exec_cb to return an error, and do the dma_mapping_error check in the obvious place This was reported previously here: http://article.gmane.org/gmane.linux.network/257893 But nobody stepped up and fixed it. CC: Josh Boyer CC: e1000-devel@lists.sourceforge.net Signed-off-by: Neil Horman Reported-by: Michal Jaegermann Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e100.c | 36 +++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index ec800b093e7e..d2bea3f07c73 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -870,7 +870,7 @@ err_unlock: } static int e100_exec_cb(struct nic *nic, struct sk_buff *skb, - void (*cb_prepare)(struct nic *, struct cb *, struct sk_buff *)) + int (*cb_prepare)(struct nic *, struct cb *, struct sk_buff *)) { struct cb *cb; unsigned long flags; @@ -888,10 +888,13 @@ static int e100_exec_cb(struct nic *nic, struct sk_buff *skb, nic->cbs_avail--; cb->skb = skb; + err = cb_prepare(nic, cb, skb); + if (err) + goto err_unlock; + if (unlikely(!nic->cbs_avail)) err = -ENOSPC; - cb_prepare(nic, cb, skb); /* Order is important otherwise we'll be in a race with h/w: * set S-bit in current first, then clear S-bit in previous. */ @@ -1091,7 +1094,7 @@ static void e100_get_defaults(struct nic *nic) nic->mii.mdio_write = mdio_write; } -static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) +static int e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) { struct config *config = &cb->u.config; u8 *c = (u8 *)config; @@ -1181,6 +1184,7 @@ static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) netif_printk(nic, hw, KERN_DEBUG, nic->netdev, "[16-23]=%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n", c[16], c[17], c[18], c[19], c[20], c[21], c[22], c[23]); + return 0; } /************************************************************************* @@ -1331,7 +1335,7 @@ static const struct firmware *e100_request_firmware(struct nic *nic) return fw; } -static void e100_setup_ucode(struct nic *nic, struct cb *cb, +static int e100_setup_ucode(struct nic *nic, struct cb *cb, struct sk_buff *skb) { const struct firmware *fw = (void *)skb; @@ -1358,6 +1362,7 @@ static void e100_setup_ucode(struct nic *nic, struct cb *cb, cb->u.ucode[min_size] |= cpu_to_le32((BUNDLESMALL) ? 0xFFFF : 0xFF80); cb->command = cpu_to_le16(cb_ucode | cb_el); + return 0; } static inline int e100_load_ucode_wait(struct nic *nic) @@ -1400,18 +1405,20 @@ static inline int e100_load_ucode_wait(struct nic *nic) return err; } -static void e100_setup_iaaddr(struct nic *nic, struct cb *cb, +static int e100_setup_iaaddr(struct nic *nic, struct cb *cb, struct sk_buff *skb) { cb->command = cpu_to_le16(cb_iaaddr); memcpy(cb->u.iaaddr, nic->netdev->dev_addr, ETH_ALEN); + return 0; } -static void e100_dump(struct nic *nic, struct cb *cb, struct sk_buff *skb) +static int e100_dump(struct nic *nic, struct cb *cb, struct sk_buff *skb) { cb->command = cpu_to_le16(cb_dump); cb->u.dump_buffer_addr = cpu_to_le32(nic->dma_addr + offsetof(struct mem, dump_buf)); + return 0; } static int e100_phy_check_without_mii(struct nic *nic) @@ -1581,7 +1588,7 @@ static int e100_hw_init(struct nic *nic) return 0; } -static void e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb) +static int e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb) { struct net_device *netdev = nic->netdev; struct netdev_hw_addr *ha; @@ -1596,6 +1603,7 @@ static void e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb) memcpy(&cb->u.multi.addr[i++ * ETH_ALEN], &ha->addr, ETH_ALEN); } + return 0; } static void e100_set_multicast_list(struct net_device *netdev) @@ -1756,11 +1764,18 @@ static void e100_watchdog(unsigned long data) round_jiffies(jiffies + E100_WATCHDOG_PERIOD)); } -static void e100_xmit_prepare(struct nic *nic, struct cb *cb, +static int e100_xmit_prepare(struct nic *nic, struct cb *cb, struct sk_buff *skb) { + dma_addr_t dma_addr; cb->command = nic->tx_command; + dma_addr = pci_map_single(nic->pdev, + skb->data, skb->len, PCI_DMA_TODEVICE); + /* If we can't map the skb, have the upper layer try later */ + if (pci_dma_mapping_error(nic->pdev, dma_addr)) + return -ENOMEM; + /* * Use the last 4 bytes of the SKB payload packet as the CRC, used for * testing, ie sending frames with bad CRC. @@ -1777,11 +1792,10 @@ static void e100_xmit_prepare(struct nic *nic, struct cb *cb, cb->u.tcb.tcb_byte_count = 0; cb->u.tcb.threshold = nic->tx_threshold; cb->u.tcb.tbd_count = 1; - cb->u.tcb.tbd.buf_addr = cpu_to_le32(pci_map_single(nic->pdev, - skb->data, skb->len, PCI_DMA_TODEVICE)); - /* check for mapping failure? */ + cb->u.tcb.tbd.buf_addr = cpu_to_le32(dma_addr); cb->u.tcb.tbd.size = cpu_to_le16(skb->len); skb_tx_timestamp(skb); + return 0; } static netdev_tx_t e100_xmit_frame(struct sk_buff *skb, From fea75645342c7ad574214497a78e562db12dfd7b Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 10 Apr 2013 13:34:39 +0300 Subject: [PATCH 164/303] bnx2x: Prevent null pointer dereference in AFEX mode The cnic module is responsible for initializing various bnx2x structs via callbacks provided by the bnx2x module. One such struct is the queue object for the FCoE queue. If a device is working in AFEX mode and its configuration allows FCoE yet the cnic module is not loaded, it's very likely a null pointer dereference will occur, as the bnx2x will erroneously access the FCoE's queue object. Prevent said access until cnic properly registers itself. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index e81a747ea8ce..8e58da909f5c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -4947,7 +4947,7 @@ static void bnx2x_after_function_update(struct bnx2x *bp) q); } - if (!NO_FCOE(bp)) { + if (!NO_FCOE(bp) && CNIC_ENABLED(bp)) { fp = &bp->fp[FCOE_IDX(bp)]; queue_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj; @@ -13354,6 +13354,7 @@ static int bnx2x_unregister_cnic(struct net_device *dev) RCU_INIT_POINTER(bp->cnic_ops, NULL); mutex_unlock(&bp->cnic_mutex); synchronize_rcu(); + bp->cnic_enabled = false; kfree(bp->cnic_kwq); bp->cnic_kwq = NULL; From 7b119dc06d871405fc7c3e9a73a6c987409ba639 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 10 Apr 2013 21:38:36 +0200 Subject: [PATCH 165/303] mac80211: fix cfg80211 interaction on auth/assoc request If authentication (or association with FT) is requested by userspace, mac80211 currently doesn't tell cfg80211 that it disconnected from the AP. That leaves inconsistent state: cfg80211 thinks it's connected while mac80211 thinks it's not. Typically this won't last long, as soon as mac80211 reports the new association to cfg80211 the old one goes away. If, however, the new authentication or association doesn't succeed, then cfg80211 will forever think the old one still exists and will refuse attempts to authenticate or associate with the AP it thinks it's connected to. Anders reported that this leads to it taking a very long time to reconnect to a network, or never even succeeding. I tested this with an AP hacked to never respond to auth frames, and one that works, and with just those two the system never recovers because one won't work and cfg80211 thinks it's connected to the other so refuses connections to it. To fix this, simply make mac80211 tell cfg80211 when it is no longer connected to the old AP, while authenticating or associating to a new one. Cc: stable@vger.kernel.org Reported-by: Anders Kaseorg Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 82cc30318a86..346ad4cfb013 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3964,8 +3964,16 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* prep auth_data so we don't go into idle on disassoc */ ifmgd->auth_data = auth_data; - if (ifmgd->associated) - ieee80211_set_disassoc(sdata, 0, 0, false, NULL); + if (ifmgd->associated) { + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, + WLAN_REASON_UNSPECIFIED, + false, frame_buf); + + __cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); + } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -4025,8 +4033,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) - ieee80211_set_disassoc(sdata, 0, 0, false, NULL); + if (ifmgd->associated) { + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, + WLAN_REASON_UNSPECIFIED, + false, frame_buf); + + __cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); + } if (ifmgd->auth_data && !ifmgd->auth_data->done) { err = -EBUSY; From 6b07a24fc38476e04f591cc17594bc1835b9efdd Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 10 Apr 2013 15:34:14 -0400 Subject: [PATCH 166/303] lsm: add the missing documentation for the security_skb_owned_by() hook Unfortunately we didn't catch the missing comments earlier when the patch was merged. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/security.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/security.h b/include/linux/security.h index 6c3a78ace051..032c366ef1c6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1012,6 +1012,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * This hook can be used by the module to update any security state * associated with the TUN device's security structure. * @security pointer to the TUN devices's security structure. + * @skb_owned_by: + * This hook sets the packet's owning sock. + * @skb is the packet. + * @sk the sock which owns the packet. * * Security hooks for XFRM operations. * From c369c9a4a7c82d33329d869cbaf93304cc7a0c40 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Tue, 9 Apr 2013 18:17:41 +0100 Subject: [PATCH 167/303] cifs: Allow passwords which begin with a delimitor Fixes a regression in cifs_parse_mount_options where a password which begins with a delimitor is parsed incorrectly as being a blank password. Signed-off-by: Sachin Prabhu Acked-by: Jeff Layton Cc: Signed-off-by: Steve French --- fs/cifs/connect.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 991c63c6bdd0..21b3a291c327 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1575,14 +1575,24 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } break; case Opt_blank_pass: - vol->password = NULL; - break; - case Opt_pass: /* passwords have to be handled differently * to allow the character used for deliminator * to be passed within them */ + /* + * Check if this is a case where the password + * starts with a delimiter + */ + tmp_end = strchr(data, '='); + tmp_end++; + if (!(tmp_end < end && tmp_end[1] == delim)) { + /* No it is not. Set the password to NULL */ + vol->password = NULL; + break; + } + /* Yes it is. Drop down to Opt_pass below.*/ + case Opt_pass: /* Obtain the value string */ value = strchr(data, '='); value++; From c5e6cb051c5f7d56f05bd6a4af22cb300a4ced79 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 18 Feb 2013 18:13:09 +0000 Subject: [PATCH 168/303] kvm/powerpc/e500mc: fix tlb invalidation on cpu migration The existing check handles the case where we've migrated to a different core than we last ran on, but it doesn't handle the case where we're still on the same cpu we last ran on, but some other vcpu has run on this cpu in the meantime. Without this, guest segfaults (and other misbehavior) have been seen in smp guests. Cc: stable@vger.kernel.org # 3.8.x Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500mc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 1f89d26e65fb..2f4baa074b2e 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -108,6 +108,8 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { } +static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); + void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -136,8 +138,11 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GDEAR, vcpu->arch.shared->dar); mtspr(SPRN_GESR, vcpu->arch.shared->esr); - if (vcpu->arch.oldpir != mfspr(SPRN_PIR)) + if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || + __get_cpu_var(last_vcpu_on_cpu) != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); + __get_cpu_var(last_vcpu_on_cpu) = vcpu; + } kvmppc_load_guest_fp(vcpu); } From f76cfa3c2496c462b5bc01bd0c9340c2715b73ca Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 10 Apr 2013 15:28:25 +0200 Subject: [PATCH 169/303] x86/mm/cpa: Convert noop to functional fix Commit: a8aed3e0752b ("x86/mm/pageattr: Prevent PSE and GLOABL leftovers to confuse pmd/pte_present and pmd_huge") introduced a valid fix but one location that didn't trigger the bug that lead to finding those (small) problems, wasn't updated using the right variable. The wrong variable was also initialized for no good reason, that may have been the source of the confusion. Remove the noop initialization accordingly. Commit a8aed3e0752b also erroneously removed one canon_pgprot pass meant to clear pmd bitflags not supported in hardware by older CPUs, that automatically gets corrected by this patch too by applying it to the right variable in the new location. Reported-by: Stefan Bader Signed-off-by: Andrea Arcangeli Acked-by: Borislav Petkov Cc: Andy Whitcroft Cc: Mel Gorman Link: http://lkml.kernel.org/r/1365600505-19314-1-git-send-email-aarcange@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 091934e1d0d9..7896f7190fda 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -467,7 +467,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * We are safe now. Check whether the new pgprot is the same: */ old_pte = *kpte; - old_prot = new_prot = req_prot = pte_pgprot(old_pte); + old_prot = req_prot = pte_pgprot(old_pte); pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); @@ -478,12 +478,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL * for the ancient hardware that doesn't support it. */ - if (pgprot_val(new_prot) & _PAGE_PRESENT) - pgprot_val(new_prot) |= _PAGE_PSE | _PAGE_GLOBAL; + if (pgprot_val(req_prot) & _PAGE_PRESENT) + pgprot_val(req_prot) |= _PAGE_PSE | _PAGE_GLOBAL; else - pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL); + pgprot_val(req_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL); - new_prot = canon_pgprot(new_prot); + req_prot = canon_pgprot(req_prot); /* * old_pte points to the large page base address. So we need From f6da51c3ef07c593a410551af4cad093e2fbe755 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 10 Apr 2013 15:06:13 +0800 Subject: [PATCH 170/303] tcm_vhost: Remove double check of response We did the length of response check twice. Signed-off-by: Asias He Acked-by: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger --- drivers/vhost/tcm_vhost.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index c127731b4230..28c112f23830 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -705,15 +705,6 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, tv_cmd->tvc_vhost = vs; tv_cmd->tvc_vq = vq; - - if (unlikely(vq->iov[out].iov_len != - sizeof(struct virtio_scsi_cmd_resp))) { - vq_err(vq, "Expecting virtio_scsi_cmd_resp, got %zu" - " bytes, out: %d, in: %d\n", - vq->iov[out].iov_len, out, in); - break; - } - tv_cmd->tvc_resp = vq->iov[out].iov_base; /* From 7ea206cf3b06704cf2bfc9c9c395094b24dc44a2 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 10 Apr 2013 15:06:14 +0800 Subject: [PATCH 171/303] tcm_vhost: Fix tv_cmd leak in vhost_scsi_handle_vq If we fail to submit the allocated tv_vmd to tcm_vhost_submission_work, we will leak the tv_vmd. Free tv_vmd on fail path. Signed-off-by: Asias He Acked-by: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger --- drivers/vhost/tcm_vhost.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 28c112f23830..210d59e222fc 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -724,7 +724,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n", scsi_command_size(tv_cmd->tvc_cdb), TCM_VHOST_MAX_CDB_SIZE); - break; /* TODO */ + goto err; } tv_cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; @@ -737,7 +737,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, data_direction == DMA_TO_DEVICE); if (unlikely(ret)) { vq_err(vq, "Failed to map iov to sgl\n"); - break; /* TODO */ + goto err; } } @@ -758,6 +758,11 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, } mutex_unlock(&vq->mutex); + return; + +err: + vhost_scsi_free_cmd(tv_cmd); + mutex_unlock(&vq->mutex); } static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) From 637ab21e284f4e9188acc2746695f7f6452b4a22 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 10 Apr 2013 15:06:15 +0800 Subject: [PATCH 172/303] tcm_vhost: Add vhost_scsi_send_bad_target() helper Share the send bad target code with other use cases. Signed-off-by: Asias He Acked-by: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger --- drivers/vhost/tcm_vhost.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 210d59e222fc..1bb0fb4950d1 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -578,6 +578,23 @@ static void tcm_vhost_submission_work(struct work_struct *work) } } +static void vhost_scsi_send_bad_target(struct vhost_scsi *vs, + struct vhost_virtqueue *vq, int head, unsigned out) +{ + struct virtio_scsi_cmd_resp __user *resp; + struct virtio_scsi_cmd_resp rsp; + int ret; + + memset(&rsp, 0, sizeof(rsp)); + rsp.response = VIRTIO_SCSI_S_BAD_TARGET; + resp = vq->iov[out].iov_base; + ret = __copy_to_user(resp, &rsp, sizeof(rsp)); + if (!ret) + vhost_add_used_and_signal(&vs->dev, vq, head, 0); + else + pr_err("Faulted on virtio_scsi_cmd_resp\n"); +} + static void vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) { @@ -673,19 +690,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, /* Target does not exist, fail the request */ if (unlikely(!tv_tpg)) { - struct virtio_scsi_cmd_resp __user *resp; - struct virtio_scsi_cmd_resp rsp; - - memset(&rsp, 0, sizeof(rsp)); - rsp.response = VIRTIO_SCSI_S_BAD_TARGET; - resp = vq->iov[out].iov_base; - ret = __copy_to_user(resp, &rsp, sizeof(rsp)); - if (!ret) - vhost_add_used_and_signal(&vs->dev, - vq, head, 0); - else - pr_err("Faulted on virtio_scsi_cmd_resp\n"); - + vhost_scsi_send_bad_target(vs, vq, head, out); continue; } From 055f648c49d948de9452b0f1758869bbbd0097b8 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 10 Apr 2013 15:06:16 +0800 Subject: [PATCH 173/303] tcm_vhost: Send bad target to guest when cmd fails Send bad target to guest in case: 1) we can not allocate the cmd 2) fail to submit the cmd Signed-off-by: Asias He Acked-by: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger --- drivers/vhost/tcm_vhost.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 1bb0fb4950d1..957a0b98a5d9 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -703,7 +703,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, if (IS_ERR(tv_cmd)) { vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", PTR_ERR(tv_cmd)); - break; + goto err_cmd; } pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction" ": %d\n", tv_cmd, exp_data_len, data_direction); @@ -729,7 +729,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n", scsi_command_size(tv_cmd->tvc_cdb), TCM_VHOST_MAX_CDB_SIZE); - goto err; + goto err_free; } tv_cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; @@ -742,7 +742,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, data_direction == DMA_TO_DEVICE); if (unlikely(ret)) { vq_err(vq, "Failed to map iov to sgl\n"); - goto err; + goto err_free; } } @@ -765,8 +765,10 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, mutex_unlock(&vq->mutex); return; -err: +err_free: vhost_scsi_free_cmd(tv_cmd); +err_cmd: + vhost_scsi_send_bad_target(vs, vq, head, out); mutex_unlock(&vq->mutex); } From 30f359a6f9da65a66de8cadf959f0f4a0d498bba Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 10 Apr 2013 15:00:27 -0700 Subject: [PATCH 174/303] target: Fix incorrect fallthrough of ALUA Standby/Offline/Transition CDBs This patch fixes a bug where a handful of informational / control CDBs that should be allowed during ALUA access state Standby/Offline/Transition where incorrectly returning CHECK_CONDITION + ASCQ_04H_ALUA_TG_PT_*. This includes INQUIRY + REPORT_LUNS, which would end up preventing LUN registration when LUN scanning occured during these ALUA access states. Cc: Hannes Reinecke Cc: Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index ff1c5ee352cb..cbe48ab41745 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -409,6 +409,7 @@ static inline int core_alua_state_standby( case REPORT_LUNS: case RECEIVE_DIAGNOSTIC: case SEND_DIAGNOSTIC: + return 0; case MAINTENANCE_IN: switch (cdb[1] & 0x1f) { case MI_REPORT_TARGET_PGS: @@ -451,6 +452,7 @@ static inline int core_alua_state_unavailable( switch (cdb[0]) { case INQUIRY: case REPORT_LUNS: + return 0; case MAINTENANCE_IN: switch (cdb[1] & 0x1f) { case MI_REPORT_TARGET_PGS: @@ -491,6 +493,7 @@ static inline int core_alua_state_transition( switch (cdb[0]) { case INQUIRY: case REPORT_LUNS: + return 0; case MAINTENANCE_IN: switch (cdb[1] & 0x1f) { case MI_REPORT_TARGET_PGS: From 6b2ba1a9129bd98b5268a4efb167c95189b3eabf Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 13 Feb 2013 19:37:48 +0000 Subject: [PATCH 175/303] kvm/ppc/e500: h2g_tlb1_rmap: esel 0 is valid Add one to esel values in h2g_tlb1_rmap, so that "no mapping" can be distinguished from "esel 0". Note that we're not saved by the fact that host esel 0 is reserved for non-KVM use, because KVM host esel numbering is not the raw host numbering (see to_htlb1_esel). Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu_host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index a222edfb9a9b..35fb80ec1f57 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -511,10 +511,10 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; if (vcpu_e500->h2g_tlb1_rmap[sesel]) { - unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel] - 1; vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->h2g_tlb1_rmap[sesel] = esel; + vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; return sesel; } From 66a5fecdccd4f32a22d2d110cf4f002755b520d8 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 13 Feb 2013 19:37:49 +0000 Subject: [PATCH 176/303] kvm/ppc/e500: g2h_tlb1_map: clear old bit before setting new bit It's possible that we're using the same host TLB1 slot to map (a presumably different portion of) the same guest TLB1 entry. Clear the bit in the map before setting it, so that if the esels are the same the bit will remain set. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu_host.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 35fb80ec1f57..8e72b2124f63 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -507,13 +507,14 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) vcpu_e500->host_tlb1_nv = 0; - vcpu_e500->tlb_refs[1][sesel] = *ref; - vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; if (vcpu_e500->h2g_tlb1_rmap[sesel]) { unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel] - 1; vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } + + vcpu_e500->tlb_refs[1][sesel] = *ref; + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; return sesel; From 4d2be6f7c75e814ee28b007dbf0c26dfcbbe20a1 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 6 Mar 2013 16:02:49 +0000 Subject: [PATCH 177/303] kvm/ppc/e500: eliminate tlb_refs Commit 523f0e5421c12610527c620b983b443f329e3a32 ("KVM: PPC: E500: Explicitly mark shadow maps invalid") began using E500_TLB_VALID for guest TLB1 entries, and skipping invalidations if it's not set. However, when E500_TLB_VALID was set for such entries, it was on a fake local ref, and so the invalidations never happen. gtlb_privs is documented as being only for guest TLB0, though we already violate that with E500_TLB_BITMAP. Now that we have MMU notifiers, and thus don't need to actually retain a reference to the mapped pages, get rid of tlb_refs, and use gtlb_privs for E500_TLB_VALID in TLB1. Since we can have more than one host TLB entry for a given tlbe_ref, be careful not to clear existing flags that are relevant to other host TLB entries when preparing a new host TLB entry. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500.h | 24 ++++------ arch/powerpc/kvm/e500_mmu_host.c | 75 ++++++++++---------------------- 2 files changed, 30 insertions(+), 69 deletions(-) diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 41cefd43655f..33db48a8ce24 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -26,17 +26,20 @@ #define E500_PID_NUM 3 #define E500_TLB_NUM 2 -#define E500_TLB_VALID 1 -#define E500_TLB_BITMAP 2 +/* entry is mapped somewhere in host TLB */ +#define E500_TLB_VALID (1 << 0) +/* TLB1 entry is mapped by host TLB1, tracked by bitmaps */ +#define E500_TLB_BITMAP (1 << 1) +/* TLB1 entry is mapped by host TLB0 */ #define E500_TLB_TLB0 (1 << 2) struct tlbe_ref { - pfn_t pfn; - unsigned int flags; /* E500_TLB_* */ + pfn_t pfn; /* valid only for TLB0, except briefly */ + unsigned int flags; /* E500_TLB_* */ }; struct tlbe_priv { - struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ + struct tlbe_ref ref; }; #ifdef CONFIG_KVM_E500V2 @@ -63,17 +66,6 @@ struct kvmppc_vcpu_e500 { unsigned int gtlb_nv[E500_TLB_NUM]; - /* - * information associated with each host TLB entry -- - * TLB1 only for now. If/when guest TLB1 entries can be - * mapped with host TLB0, this will be used for that too. - * - * We don't want to use this for guest TLB0 because then we'd - * have the overhead of doing the translation again even if - * the entry is still in the guest TLB (e.g. we swapped out - * and back, and our host TLB entries got evicted). - */ - struct tlbe_ref *tlb_refs[E500_TLB_NUM]; unsigned int host_tlb1_nv; u32 svr; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 8e72b2124f63..1c6a9d729df4 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -193,8 +193,11 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; /* Don't bother with unmapped entries */ - if (!(ref->flags & E500_TLB_VALID)) - return; + if (!(ref->flags & E500_TLB_VALID)) { + WARN(ref->flags & (E500_TLB_BITMAP | E500_TLB_TLB0), + "%s: flags %x\n", __func__, ref->flags); + WARN_ON(tlbsel == 1 && vcpu_e500->g2h_tlb1_map[esel]); + } if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; @@ -248,7 +251,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, pfn_t pfn) { ref->pfn = pfn; - ref->flags = E500_TLB_VALID; + ref->flags |= E500_TLB_VALID; if (tlbe_is_writable(gtlbe)) kvm_set_pfn_dirty(pfn); @@ -257,6 +260,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) { if (ref->flags & E500_TLB_VALID) { + /* FIXME: don't log bogus pfn for TLB1 */ trace_kvm_booke206_ref_release(ref->pfn, ref->flags); ref->flags = 0; } @@ -274,36 +278,23 @@ static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) { - int tlbsel = 0; + int tlbsel; int i; - for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->gtlb_priv[tlbsel][i].ref; - kvmppc_e500_ref_release(ref); + for (tlbsel = 0; tlbsel <= 1; tlbsel++) { + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { + struct tlbe_ref *ref = + &vcpu_e500->gtlb_priv[tlbsel][i].ref; + kvmppc_e500_ref_release(ref); + } } } -static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - int stlbsel = 1; - int i; - - kvmppc_e500_tlbil_all(vcpu_e500); - - for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->tlb_refs[stlbsel][i]; - kvmppc_e500_ref_release(ref); - } - - clear_tlb_privs(vcpu_e500); -} - void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - clear_tlb_refs(vcpu_e500); + kvmppc_e500_tlbil_all(vcpu_e500); + clear_tlb_privs(vcpu_e500); clear_tlb1_bitmap(vcpu_e500); } @@ -458,8 +449,6 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); } - /* Drop old ref and setup new one. */ - kvmppc_e500_ref_release(ref); kvmppc_e500_ref_setup(ref, gtlbe, pfn); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, @@ -512,10 +501,10 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->tlb_refs[1][sesel] = *ref; vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; + WARN_ON(!(ref->flags & E500_TLB_VALID)); return sesel; } @@ -527,13 +516,12 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, struct kvm_book3e_206_tlb_entry *stlbe, int esel) { - struct tlbe_ref ref; + struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[1][esel].ref; int sesel; int r; - ref.flags = 0; r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, - &ref); + ref); if (r) return r; @@ -545,7 +533,7 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, } /* Otherwise map into TLB1 */ - sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, &ref, esel); + sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, ref, esel); write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel); return 0; @@ -566,7 +554,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, case 0: priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - /* Triggers after clear_tlb_refs or on initial mapping */ + /* Triggers after clear_tlb_privs or on initial mapping */ if (!(priv->ref.flags & E500_TLB_VALID)) { kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); } else { @@ -666,35 +654,16 @@ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) host_tlb_params[0].entries / host_tlb_params[0].ways; host_tlb_params[1].sets = 1; - vcpu_e500->tlb_refs[0] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[0]) - goto err; - - vcpu_e500->tlb_refs[1] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[1]) - goto err; - vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * host_tlb_params[1].entries, GFP_KERNEL); if (!vcpu_e500->h2g_tlb1_rmap) - goto err; + return -EINVAL; return 0; - -err: - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); - return -EINVAL; } void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { kfree(vcpu_e500->h2g_tlb1_rmap); - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); } From 7791c8423f1f7f4dad94e753bae67461d5b80be8 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 10 Apr 2013 10:59:34 +0200 Subject: [PATCH 178/303] x86,efi: Check max_size only if it is non-zero. Some EFI implementations return always a MaximumVariableSize of 0, check against max_size only if it is non-zero. My Intel DQ67SW desktop board has such an implementation. Signed-off-by: Richard Weinberger Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index c89c245eff40..3f96a487aa2a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1018,7 +1018,12 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) if (status != EFI_SUCCESS) return status; - if (!storage_size || size > remaining_size || size > max_size || + if (!max_size && remaining_size > size) + printk_once(KERN_ERR FW_BUG "Broken EFI implementation" + " is returning MaxVariableSize=0\n"); + + if (!storage_size || size > remaining_size || + (max_size && size > max_size) || (remaining_size - size) < (storage_size / 2)) return EFI_OUT_OF_RESOURCES; From e7a7f972f9120a7985d38a2be8c540047c94392f Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 3 Apr 2013 19:40:14 +0200 Subject: [PATCH 179/303] gpio: pca953x: fix irq_domain_add_simple usage We actually have to pass chip as the host_data parameter of irq_domain_add_simple() as later on, it is used to initialize chip_data in pca953x_gpio_irq_map(). Failing to do so is leading to a NULL pointer dereference after calling irq_data_get_irq_chip_data() in pca953x_irq_mask(), pca953x_irq_unmask(), pca953x_irq_bus_lock(), pca953x_irq_bus_sync_unlock() and pca953x_irq_set_type(). Fixes regression introduced by commit 0e8f2fdacf1d44651aa7e57063c76142d1f4988b (gpio: pca953x: use simple irqdomain) Signed-off-by: Alexandre Belloni Acked-by: Maxime Ripard Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 24059462c87f..9391cf16e990 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -575,7 +575,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, chip->gpio_chip.ngpio, irq_base, &pca953x_irq_simple_ops, - NULL); + chip); if (!chip->domain) return -ENODEV; From d66954a066158781ccf9c13c91d0316970fe57b6 Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Thu, 11 Apr 2013 08:55:07 +0000 Subject: [PATCH 180/303] tcp: incoming connections might use wrong route under synflood There is a bug in cookie_v4_check (net/ipv4/syncookies.c): flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, ireq->loc_addr, th->source, th->dest); Here we do not respect sk->sk_bound_dev_if, therefore wrong dst_entry may be taken. This dst_entry is used by new socket (get_cookie_sock -> tcp_v4_syn_recv_sock), so its packets may take the wrong path. Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ef54377fb11c..397e0f69435f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -349,8 +349,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), - RT_SCOPE_UNIVERSE, IPPROTO_TCP, + flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, ireq->loc_addr, th->source, th->dest); From 6101391d4a381cc0c661d8765235b3cad7da09e5 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 11 Apr 2013 09:18:55 +0000 Subject: [PATCH 181/303] bonding: fix netdev event NULL pointer dereference In commit 471cb5a33dcbd7c529684a2ac7ba4451414ee4a7 ("bonding: remove usage of dev->master") a bug was introduced which causes a NULL pointer dereference. If a bond device is in mode 6 (ALB) and a slave is added it will dereference a NULL pointer in bond_slave_netdev_event(). This is because in bond_enslave we have bond_alb_init_slave() which changes the MAC address of the slave and causes a NETDEV_CHANGEADDR. Then we have in bond_slave_netdev_event(): struct slave *slave = bond_slave_get_rtnl(slave_dev); struct bonding *bond = slave->bond; bond_slave_get_rtnl() dereferences slave_dev->rx_handler_data which at that time is NULL since netdev_rx_handler_register() is called later. This is fixed by checking if slave is NULL before dereferencing it. v2: Comment style changed. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 07401a3e256b..e074c6b55517 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3168,11 +3168,20 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) { struct slave *slave = bond_slave_get_rtnl(slave_dev); - struct bonding *bond = slave->bond; - struct net_device *bond_dev = slave->bond->dev; + struct bonding *bond; + struct net_device *bond_dev; u32 old_speed; u8 old_duplex; + /* A netdev event can be generated while enslaving a device + * before netdev_rx_handler_register is called in which case + * slave will be NULL + */ + if (!slave) + return NOTIFY_DONE; + bond_dev = slave->bond->dev; + bond = slave->bond; + switch (event) { case NETDEV_UNREGISTER: if (bond->setup_by_slave) From b6a5a7b9a528a8b4c8bec940b607c5dd9102b8cc Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 11 Apr 2013 09:18:56 +0000 Subject: [PATCH 182/303] bonding: IFF_BONDING is not stripped on enslave failure While enslaving a new device and after IFF_BONDING flag is set, in case of failure it is not stripped from the device's priv_flags while cleaning up, which could lead to other problems. Cleaning at err_close because the flag is set after dev_open(). v2: no change Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e074c6b55517..a61a760484f7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1906,6 +1906,7 @@ err_detach: write_unlock_bh(&bond->lock); err_close: + slave_dev->priv_flags &= ~IFF_BONDING; dev_close(slave_dev); err_unset_master: From 7078b984223e0e0e17ecbf32bd05ef1a927c2788 Mon Sep 17 00:00:00 2001 From: Christian Ruppert Date: Tue, 9 Apr 2013 21:56:04 +0000 Subject: [PATCH 183/303] stmmac: prevent interrupt loop with MMC RX IPC Counter If the DesignWare MAC is synthesised with MMC RX IPC Counter, an unmanaged and unacknowledged interrupt is generated after some time of operation. This patch masks the undesired interrupts. Signed-off-by: Christian Ruppert Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/mmc_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index 0c74a702d461..50617c5a0bdb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -149,6 +149,7 @@ void dwmac_mmc_intr_all_mask(void __iomem *ioaddr) { writel(MMC_DEFAULT_MASK, ioaddr + MMC_RX_INTR_MASK); writel(MMC_DEFAULT_MASK, ioaddr + MMC_TX_INTR_MASK); + writel(MMC_DEFAULT_MASK, ioaddr + MMC_RX_IPC_INTR_MASK); } /* This reads the MAC core counters (if actaully supported). From 5a25bf36b7394f11f2ff90c9e203639cd3dc041a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 2 Dec 2012 08:33:24 -0500 Subject: [PATCH 184/303] [SCSI] lpfc: fix potential NULL pointer dereference in lpfc_sli4_rq_put() The dereference to 'put_index' should be moved below the NULL test. Signed-off-by: Wei Yongjun Acked-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/lpfc/lpfc_sli.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 74b67d98e952..d43faf34c1e2 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -438,11 +438,12 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, struct lpfc_rqe *temp_hrqe; struct lpfc_rqe *temp_drqe; struct lpfc_register doorbell; - int put_index = hq->host_index; + int put_index; /* sanity check on queue memory */ if (unlikely(!hq) || unlikely(!dq)) return -ENOMEM; + put_index = hq->host_index; temp_hrqe = hq->qe[hq->host_index].rqe; temp_drqe = dq->qe[dq->host_index].rqe; From 50bceae9bd3569d56744882f3012734d48a1d413 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 11 Apr 2013 10:57:18 +0000 Subject: [PATCH 185/303] tcp: Reallocate headroom if it would overflow csum_start If a TCP retransmission gets partially ACKed and collapsed multiple times it is possible for the headroom to grow beyond 64K which will overflow the 16bit skb->csum_start which is based on the start of the headroom. It has been observed rarely in the wild with IPoIB due to the 64K MTU. Verify if the acking and collapsing resulted in a headroom exceeding what csum_start can cover and reallocate the headroom if so. A big thank you to Jim Foraker and the team at LLNL for helping out with the investigation and testing. Reported-by: Jim Foraker Signed-off-by: Thomas Graf Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b44cf81d8178..509912a5ff98 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2388,8 +2388,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ TCP_SKB_CB(skb)->when = tcp_time_stamp; - /* make sure skb->data is aligned on arches that require it */ - if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { + /* make sure skb->data is aligned on arches that require it + * and check if ack-trimming & collapsing extended the headroom + * beyond what csum_start can cover. + */ + if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || + skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : From 1baee58638fc58248625255f5c5fcdb987f11b1f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 12 Apr 2013 13:25:20 +1000 Subject: [PATCH 186/303] udl: handle EDID failure properly. Don't oops seems proper. Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/udl/udl_connector.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index fe5cdbcf2636..b44d548c56f8 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -61,6 +61,10 @@ static int udl_get_modes(struct drm_connector *connector) int ret; edid = (struct edid *)udl_get_edid(udl); + if (!edid) { + drm_mode_connector_update_edid_property(connector, NULL); + return 0; + } /* * We only read the main block, but if the monitor reports extension From 89ced125472b8551c65526934b7f6c733a6864fa Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 11 Apr 2013 14:26:55 +0000 Subject: [PATCH 187/303] drm/fb-helper: Fix locking in drm_fb_helper_hotplug_event Driver's and ->fill_modes functions are allowed to grab crtc mutexes (for e.g. load detect). Hence we need to first only grab the general kms mutex, and only in a second step grab all locks to do the modesets. This prevents a deadlock on my gm45 in the tv load detect code called by drm_helper_probe_single_connector_modes. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_fb_helper.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 59d6b9bf204b..892ff9f95975 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1544,10 +1544,10 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) if (!fb_helper->fb) return 0; - drm_modeset_lock_all(dev); + mutex_lock(&fb_helper->dev->mode_config.mutex); if (!drm_fb_helper_is_bound(fb_helper)) { fb_helper->delayed_hotplug = true; - drm_modeset_unlock_all(dev); + mutex_unlock(&fb_helper->dev->mode_config.mutex); return 0; } DRM_DEBUG_KMS("\n"); @@ -1558,9 +1558,11 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) count = drm_fb_helper_probe_connector_modes(fb_helper, max_width, max_height); + mutex_unlock(&fb_helper->dev->mode_config.mutex); + + drm_modeset_lock_all(dev); drm_setup_crtcs(fb_helper); drm_modeset_unlock_all(dev); - drm_fb_helper_set_par(fb_helper->fbdev); return 0; From c481420248c6730246d2a1b1773d5d7007ae0835 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Apr 2013 11:05:54 +0800 Subject: [PATCH 188/303] perf: Fix error return code Fix to return -ENOMEM in the allocation error case instead of 0 (if pmu_bus_running == 1), as done elsewhere in this function. Signed-off-by: Wei Yongjun Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org Cc: acme@ghostprotocols.net Link: http://lkml.kernel.org/r/CAPgLHd8j_fWcgqe%3DKLWjpBj%2B%3Do0Pw6Z-SEq%3DNTPU08c2w1tngQ@mail.gmail.com [ Tweaked the error code setting placement and the changelog. ] Signed-off-by: Ingo Molnar --- kernel/events/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7f0d67ea3f48..7e0962ed7f8a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5987,6 +5987,7 @@ skip_type: if (pmu->pmu_cpu_context) goto got_cpu_context; + ret = -ENOMEM; pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); if (!pmu->pmu_cpu_context) goto free_dev; From 18699739b60cb60230153ff5475b2ba92be185f9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 11 Apr 2013 15:36:09 +0200 Subject: [PATCH 189/303] x86/mm/cpa/selftest: Fix false positive in CPA self test If the pmd is not present, _PAGE_PSE will not be set anymore. Fix the false positive. Reported-by: Ingo Molnar Signed-off-by: Andrea Arcangeli Cc: Stefan Bader Cc: Andy Whitcroft Cc: Mel Gorman Cc: Borislav Petkov Link: http://lkml.kernel.org/r/1365687369-30802-1-git-send-email-aarcange@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index b0086567271c..0e38951e65eb 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -68,7 +68,7 @@ static int print_split(struct split_state *s) s->gpg++; i += GPS/PAGE_SIZE; } else if (level == PG_LEVEL_2M) { - if (!(pte_val(*pte) & _PAGE_PSE)) { + if ((pte_val(*pte) & _PAGE_PRESENT) && !(pte_val(*pte) & _PAGE_PSE)) { printk(KERN_ERR "%lx level %d but not PSE %Lx\n", addr, level, (u64)pte_val(*pte)); From 26564600c9e88c6572a5e6ef5ae9121907edfb7f Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 11 Apr 2013 13:59:52 -0400 Subject: [PATCH 190/303] x86/mm: Flush lazy MMU when DEBUG_PAGEALLOC is set When CONFIG_DEBUG_PAGEALLOC is set page table updates made by kernel_map_pages() are not made visible (via TLB flush) immediately if lazy MMU is on. In environments that support lazy MMU (e.g. Xen) this may lead to fatal page faults, for example, when zap_pte_range() needs to allocate pages in __tlb_remove_page() -> tlb_next_batch(). Signed-off-by: Boris Ostrovsky Cc: konrad.wilk@oracle.com Link: http://lkml.kernel.org/r/1365703192-2089-1-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7896f7190fda..fb4e73ec24d8 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1413,6 +1413,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) * but that can deadlock->flush only current cpu: */ __flush_tlb_all(); + + arch_flush_lazy_mmu_mode(); } #ifdef CONFIG_HIBERNATION From c2d421e171868586939c328dfb91bab840fe4c49 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Apr 2013 04:22:39 +0000 Subject: [PATCH 191/303] netfilter: nf_nat: fix race when unloading protocol modules following oops was reported: RIP: 0010:[] [] nf_nat_cleanup_conntrack+0x42/0x70 [nf_nat] RSP: 0018:ffff880202c63d40 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8801ac7bec28 RCX: ffff8801d0eedbe0 RDX: dead000000200200 RSI: 0000000000000011 RDI: ffffffffa03265b8 [..] Call Trace: [..] [] destroy_conntrack+0xbd/0x110 [nf_conntrack] Happens when a conntrack timeout expires right after first part of the nat cleanup has completed (bysrc hash removal), but before part 2 has completed (re-initialization of nat area). [ destroy callback tries to delete bysrc again ] Patrick suggested to just remove the affected conntracks -- the connections won't work properly anyway without nat transformation. So, lets do that. Reported-by: CAI Qian Cc: Patrick McHardy Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 40 +++++++------------------------------ 1 file changed, 7 insertions(+), 33 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 8d5769c6d16e..ad24be070e53 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -467,33 +467,22 @@ EXPORT_SYMBOL_GPL(nf_nat_packet); struct nf_nat_proto_clean { u8 l3proto; u8 l4proto; - bool hash; }; -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int nf_nat_proto_clean(struct nf_conn *i, void *data) +/* kill conntracks with affected NAT section */ +static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; struct nf_conn_nat *nat = nfct_nat(i); if (!nat) return 0; - if (!(i->status & IPS_SRC_NAT_DONE)) - return 0; + if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; - if (clean->hash) { - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); - spin_unlock_bh(&nf_nat_lock); - } else { - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | - IPS_SEQ_ADJUST); - } - return 0; + return i->status & IPS_NAT_MASK ? 1 : 0; } static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) @@ -505,16 +494,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) struct net *net; rtnl_lock(); - /* Step 1 - remove from bysource hash */ - clean.hash = true; for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); - synchronize_rcu(); - - /* Step 2 - clean NAT section */ - clean.hash = false; - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); rtnl_unlock(); } @@ -526,16 +507,9 @@ static void nf_nat_l3proto_clean(u8 l3proto) struct net *net; rtnl_lock(); - /* Step 1 - remove from bysource hash */ - clean.hash = true; - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); - synchronize_rcu(); - /* Step 2 - clean NAT section */ - clean.hash = false; for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); rtnl_unlock(); } @@ -773,7 +747,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; - nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); synchronize_rcu(); nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } From db388d6460ffa53b3b38429da6f70a913f89b048 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 11 Apr 2013 10:08:27 +0200 Subject: [PATCH 192/303] can: mcp251x: add missing IRQF_ONESHOT to request_threaded_irq Since commit: 1c6c695 genirq: Reject bogus threaded irq requests threaded irqs must provide a primary handler or set the IRQF_ONESHOT flag. Since the mcp251x driver doesn't make use of a primary handler set the IRQF_ONESHOT flag. Cc: linux-stable # >= v3.5 Reported-by: Mylene Josserand Tested-by: Mylene Josserand Signed-off-by: Marc Kleine-Budde --- drivers/net/can/mcp251x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c index f32b9fc6a983..9aa0c64c33c8 100644 --- a/drivers/net/can/mcp251x.c +++ b/drivers/net/can/mcp251x.c @@ -929,6 +929,7 @@ static int mcp251x_open(struct net_device *net) struct mcp251x_priv *priv = netdev_priv(net); struct spi_device *spi = priv->spi; struct mcp251x_platform_data *pdata = spi->dev.platform_data; + unsigned long flags; int ret; ret = open_candev(net); @@ -945,9 +946,14 @@ static int mcp251x_open(struct net_device *net) priv->tx_skb = NULL; priv->tx_len = 0; + flags = IRQF_ONESHOT; + if (pdata->irq_flags) + flags |= pdata->irq_flags; + else + flags |= IRQF_TRIGGER_FALLING; + ret = request_threaded_irq(spi->irq, NULL, mcp251x_can_ist, - pdata->irq_flags ? pdata->irq_flags : IRQF_TRIGGER_FALLING, - DEVICE_NAME, priv); + flags, DEVICE_NAME, priv); if (ret) { dev_err(&spi->dev, "failed to acquire irq %d\n", spi->irq); if (pdata->transceiver_enable) From b02e48f2f8755e51fe8ce246988a29bd96f6b85e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 12 Apr 2013 11:39:57 +0100 Subject: [PATCH 193/303] MAINTAINERS: Update e-mail address Update the e-mail address I use for subsystems. Signed-off-by: Mark Brown --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 836a6183c37f..c6108c319db9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6625,7 +6625,7 @@ S: Supported F: fs/reiserfs/ REGISTER MAP ABSTRACTION -M: Mark Brown +M: Mark Brown T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git S: Supported F: drivers/base/regmap/ @@ -7374,7 +7374,7 @@ F: sound/ SOUND - SOC LAYER / DYNAMIC AUDIO POWER MANAGEMENT (ASoC) M: Liam Girdwood -M: Mark Brown +M: Mark Brown T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git L: alsa-devel@alsa-project.org (moderated for non-subscribers) W: http://alsa-project.org/main/index.php/ASoC @@ -7463,7 +7463,7 @@ F: drivers/clk/spear/ SPI SUBSYSTEM M: Grant Likely -M: Mark Brown +M: Mark Brown L: spi-devel-general@lists.sourceforge.net Q: http://patchwork.kernel.org/project/spi-devel-general/list/ T: git git://git.secretlab.ca/git/linux-2.6.git @@ -8708,7 +8708,7 @@ F: drivers/scsi/vmw_pvscsi.h VOLTAGE AND CURRENT REGULATOR FRAMEWORK M: Liam Girdwood -M: Mark Brown +M: Mark Brown W: http://opensource.wolfsonmicro.com/node/15 W: http://www.slimlogic.co.uk/?p=48 T: git git://git.kernel.org/pub/scm/linux/kernel/git/lrg/regulator.git From 0443de5fbf224abf41f688d8487b0c307dc5a4b4 Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Thu, 11 Apr 2013 21:32:57 +0200 Subject: [PATCH 194/303] can: sja1000: fix handling on dt properties on little endian systems To get correct endianes on little endian cpus (like arm) while reading device tree properties, this patch replaces of_get_property() with of_property_read_u32(). While there use of_property_read_bool() for the handling of the boolean "nxp,no-comparator-bypass" property. Cc: linux-stable Signed-off-by: Christoph Fritz Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/sja1000_of_platform.c | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/net/can/sja1000/sja1000_of_platform.c b/drivers/net/can/sja1000/sja1000_of_platform.c index 6433b81256cd..8e0c4a001939 100644 --- a/drivers/net/can/sja1000/sja1000_of_platform.c +++ b/drivers/net/can/sja1000/sja1000_of_platform.c @@ -96,8 +96,8 @@ static int sja1000_ofp_probe(struct platform_device *ofdev) struct net_device *dev; struct sja1000_priv *priv; struct resource res; - const u32 *prop; - int err, irq, res_size, prop_size; + u32 prop; + int err, irq, res_size; void __iomem *base; err = of_address_to_resource(np, 0, &res); @@ -138,27 +138,27 @@ static int sja1000_ofp_probe(struct platform_device *ofdev) priv->read_reg = sja1000_ofp_read_reg; priv->write_reg = sja1000_ofp_write_reg; - prop = of_get_property(np, "nxp,external-clock-frequency", &prop_size); - if (prop && (prop_size == sizeof(u32))) - priv->can.clock.freq = *prop / 2; + err = of_property_read_u32(np, "nxp,external-clock-frequency", &prop); + if (!err) + priv->can.clock.freq = prop / 2; else priv->can.clock.freq = SJA1000_OFP_CAN_CLOCK; /* default */ - prop = of_get_property(np, "nxp,tx-output-mode", &prop_size); - if (prop && (prop_size == sizeof(u32))) - priv->ocr |= *prop & OCR_MODE_MASK; + err = of_property_read_u32(np, "nxp,tx-output-mode", &prop); + if (!err) + priv->ocr |= prop & OCR_MODE_MASK; else priv->ocr |= OCR_MODE_NORMAL; /* default */ - prop = of_get_property(np, "nxp,tx-output-config", &prop_size); - if (prop && (prop_size == sizeof(u32))) - priv->ocr |= (*prop << OCR_TX_SHIFT) & OCR_TX_MASK; + err = of_property_read_u32(np, "nxp,tx-output-config", &prop); + if (!err) + priv->ocr |= (prop << OCR_TX_SHIFT) & OCR_TX_MASK; else priv->ocr |= OCR_TX0_PULLDOWN; /* default */ - prop = of_get_property(np, "nxp,clock-out-frequency", &prop_size); - if (prop && (prop_size == sizeof(u32)) && *prop) { - u32 divider = priv->can.clock.freq * 2 / *prop; + err = of_property_read_u32(np, "nxp,clock-out-frequency", &prop); + if (!err && prop) { + u32 divider = priv->can.clock.freq * 2 / prop; if (divider > 1) priv->cdr |= divider / 2 - 1; @@ -168,8 +168,7 @@ static int sja1000_ofp_probe(struct platform_device *ofdev) priv->cdr |= CDR_CLK_OFF; /* default */ } - prop = of_get_property(np, "nxp,no-comparator-bypass", NULL); - if (!prop) + if (!of_property_read_bool(np, "nxp,no-comparator-bypass")) priv->cdr |= CDR_CBP; /* default */ priv->irq_flags = IRQF_SHARED; From f2530dc71cf0822f90bb63ea4600caaef33a66bb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Apr 2013 09:33:34 +0200 Subject: [PATCH 195/303] kthread: Prevent unpark race which puts threads on the wrong cpu The smpboot threads rely on the park/unpark mechanism which binds per cpu threads on a particular core. Though the functionality is racy: CPU0 CPU1 CPU2 unpark(T) wake_up_process(T) clear(SHOULD_PARK) T runs leave parkme() due to !SHOULD_PARK bind_to(CPU2) BUG_ON(wrong CPU) We cannot let the tasks move themself to the target CPU as one of those tasks is actually the migration thread itself, which requires that it starts running on the target cpu right away. The solution to this problem is to prevent wakeups in park mode which are not from unpark(). That way we can guarantee that the association of the task to the target cpu is working correctly. Add a new task state (TASK_PARKED) which prevents other wakeups and use this state explicitly for the unpark wakeup. Peter noticed: Also, since the task state is visible to userspace and all the parked tasks are still in the PID space, its a good hint in ps and friends that these tasks aren't really there for the moment. The migration thread has another related issue. CPU0 CPU1 Bring up CPU2 create_thread(T) park(T) wait_for_completion() parkme() complete() sched_set_stop_task() schedule(TASK_PARKED) The sched_set_stop_task() call is issued while the task is on the runqueue of CPU1 and that confuses the hell out of the stop_task class on that cpu. So we need the same synchronizaion before sched_set_stop_task(). Reported-by: Dave Jones Reported-and-tested-by: Dave Hansen Reported-and-tested-by: Borislav Petkov Acked-by: Peter Ziljstra Cc: Srivatsa S. Bhat Cc: dhillf@gmail.com Cc: Ingo Molnar Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1304091635430.21884@ionos Signed-off-by: Thomas Gleixner --- fs/proc/array.c | 1 + include/linux/sched.h | 5 ++-- include/trace/events/sched.h | 2 +- kernel/kthread.c | 52 +++++++++++++++++++----------------- kernel/smpboot.c | 14 ++++++++-- 5 files changed, 45 insertions(+), 29 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index f7ed9ee46eb9..cbd0f1b324b9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -143,6 +143,7 @@ static const char * const task_state_array[] = { "x (dead)", /* 64 */ "K (wakekill)", /* 128 */ "W (waking)", /* 256 */ + "P (parked)", /* 512 */ }; static inline const char *get_task_state(struct task_struct *tsk) diff --git a/include/linux/sched.h b/include/linux/sched.h index d35d2b6ddbfb..e692a022527b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -163,9 +163,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #define TASK_DEAD 64 #define TASK_WAKEKILL 128 #define TASK_WAKING 256 -#define TASK_STATE_MAX 512 +#define TASK_PARKED 512 +#define TASK_STATE_MAX 1024 -#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW" +#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP" extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 5a8671e8a67f..e5586caff67a 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -147,7 +147,7 @@ TRACE_EVENT(sched_switch, __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" }, { 64, "x" }, - { 128, "W" }) : "R", + { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R", __entry->prev_state & TASK_STATE_MAX ? "+" : "", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); diff --git a/kernel/kthread.c b/kernel/kthread.c index 691dc2ef9baf..9eb7fed0bbaa 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -124,12 +124,12 @@ void *kthread_data(struct task_struct *task) static void __kthread_parkme(struct kthread *self) { - __set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_PARKED); while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) complete(&self->parked); schedule(); - __set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_PARKED); } clear_bit(KTHREAD_IS_PARKED, &self->flags); __set_current_state(TASK_RUNNING); @@ -256,8 +256,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), } EXPORT_SYMBOL(kthread_create_on_node); -static void __kthread_bind(struct task_struct *p, unsigned int cpu) +static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state) { + /* Must have done schedule() in kthread() before we set_task_cpu */ + if (!wait_task_inactive(p, state)) { + WARN_ON(1); + return; + } /* It's safe because the task is inactive. */ do_set_cpus_allowed(p, cpumask_of(cpu)); p->flags |= PF_THREAD_BOUND; @@ -274,12 +279,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu) */ void kthread_bind(struct task_struct *p, unsigned int cpu) { - /* Must have done schedule() in kthread() before we set_task_cpu */ - if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { - WARN_ON(1); - return; - } - __kthread_bind(p, cpu); + __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(kthread_bind); @@ -324,6 +324,22 @@ static struct kthread *task_get_live_kthread(struct task_struct *k) return NULL; } +static void __kthread_unpark(struct task_struct *k, struct kthread *kthread) +{ + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + /* + * We clear the IS_PARKED bit here as we don't wait + * until the task has left the park code. So if we'd + * park before that happens we'd see the IS_PARKED bit + * which might be about to be cleared. + */ + if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { + if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) + __kthread_bind(k, kthread->cpu, TASK_PARKED); + wake_up_state(k, TASK_PARKED); + } +} + /** * kthread_unpark - unpark a thread created by kthread_create(). * @k: thread created by kthread_create(). @@ -336,20 +352,8 @@ void kthread_unpark(struct task_struct *k) { struct kthread *kthread = task_get_live_kthread(k); - if (kthread) { - clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); - /* - * We clear the IS_PARKED bit here as we don't wait - * until the task has left the park code. So if we'd - * park before that happens we'd see the IS_PARKED bit - * which might be about to be cleared. - */ - if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { - if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) - __kthread_bind(k, kthread->cpu); - wake_up_process(k); - } - } + if (kthread) + __kthread_unpark(k, kthread); put_task_struct(k); } @@ -407,7 +411,7 @@ int kthread_stop(struct task_struct *k) trace_sched_kthread_stop(k); if (kthread) { set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); - clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + __kthread_unpark(k, kthread); wake_up_process(k); wait_for_completion(&kthread->exited); } diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 8eaed9aa9cf0..02fc5c933673 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -185,8 +185,18 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) } get_task_struct(tsk); *per_cpu_ptr(ht->store, cpu) = tsk; - if (ht->create) - ht->create(cpu); + if (ht->create) { + /* + * Make sure that the task has actually scheduled out + * into park position, before calling the create + * callback. At least the migration thread callback + * requires that the task is off the runqueue. + */ + if (!wait_task_inactive(tsk, TASK_PARKED)) + WARN_ON(1); + else + ht->create(cpu); + } return 0; } From f09a878511997c25a76bf111a32f6b8345a701a5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 10 Apr 2013 15:26:06 +0200 Subject: [PATCH 196/303] ath9k_hw: change AR9580 initvals to fix a stability issue The hardware parsing of Control Wrapper Frames needs to be disabled, as it has been causing spurious decryption error reports. The initvals for other chips have been updated to disable it, but AR9580 was left out for some reason. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9580_1p0_initvals.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9580_1p0_initvals.h b/drivers/net/wireless/ath/ath9k/ar9580_1p0_initvals.h index 28fd99203f64..bdee2ed67219 100644 --- a/drivers/net/wireless/ath/ath9k/ar9580_1p0_initvals.h +++ b/drivers/net/wireless/ath/ath9k/ar9580_1p0_initvals.h @@ -519,7 +519,7 @@ static const u32 ar9580_1p0_mac_core[][2] = { {0x00008258, 0x00000000}, {0x0000825c, 0x40000000}, {0x00008260, 0x00080922}, - {0x00008264, 0x9bc00010}, + {0x00008264, 0x9d400010}, {0x00008268, 0xffffffff}, {0x0000826c, 0x0000ffff}, {0x00008270, 0x00000000}, From 6a76f8c0ab19f215af2a3442870eeb5f0e81998d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 11 Apr 2013 15:55:01 +0900 Subject: [PATCH 197/303] tracing: Fix possible NULL pointer dereferences Currently set_ftrace_pid and set_graph_function files use seq_lseek for their fops. However seq_open() is called only for FMODE_READ in the fops->open() so that if an user tries to seek one of those file when she open it for writing, it sees NULL seq_file and then panic. It can be easily reproduced with following command: $ cd /sys/kernel/debug/tracing $ echo 1234 | sudo tee -a set_ftrace_pid In this example, GNU coreutils' tee opens the file with fopen(, "a") and then the fopen() internally calls lseek(). Link: http://lkml.kernel.org/r/1365663302-2170-1-git-send-email-namhyung@kernel.org Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: stable@vger.kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 10 +++++----- kernel/trace/trace_stack.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 167abf907802..eb3ce327b975 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -396,7 +396,7 @@ ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos); ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos); -loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence); +loff_t ftrace_filter_lseek(struct file *file, loff_t offset, int whence); int ftrace_regex_release(struct inode *inode, struct file *file); void __init diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 926ebfb74936..affc35d829cc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2697,7 +2697,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file) } loff_t -ftrace_regex_lseek(struct file *file, loff_t offset, int whence) +ftrace_filter_lseek(struct file *file, loff_t offset, int whence) { loff_t ret; @@ -3570,7 +3570,7 @@ static const struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, .read = seq_read, .write = ftrace_filter_write, - .llseek = ftrace_regex_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_regex_release, }; @@ -3578,7 +3578,7 @@ static const struct file_operations ftrace_notrace_fops = { .open = ftrace_notrace_open, .read = seq_read, .write = ftrace_notrace_write, - .llseek = ftrace_regex_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_regex_release, }; @@ -3783,8 +3783,8 @@ static const struct file_operations ftrace_graph_fops = { .open = ftrace_graph_open, .read = seq_read, .write = ftrace_graph_write, + .llseek = ftrace_filter_lseek, .release = ftrace_graph_release, - .llseek = seq_lseek, }; #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -4439,7 +4439,7 @@ static const struct file_operations ftrace_pid_fops = { .open = ftrace_pid_open, .write = ftrace_pid_write, .read = seq_read, - .llseek = seq_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_pid_release, }; diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 42ca822fc701..83a8b5b7bd35 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -322,7 +322,7 @@ static const struct file_operations stack_trace_filter_fops = { .open = stack_trace_filter_open, .read = seq_read, .write = ftrace_filter_write, - .llseek = ftrace_regex_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_regex_release, }; From f11a869d4e38397ac81f2a3d22e8d2aeb3992b0f Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 12 Apr 2013 16:49:24 +0200 Subject: [PATCH 198/303] be2net: take care of __vlan_put_tag return value The driver should use return value of __vlan_put_tag with appropriate NULL-check instead of old skb pointer. Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 08e54f3d288b..2886c9b63f90 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -759,8 +759,9 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, if (vlan_tx_tag_present(skb)) { vlan_tag = be_get_tx_vlan_tag(adapter, skb); - __vlan_put_tag(skb, vlan_tag); - skb->vlan_tci = 0; + skb = __vlan_put_tag(skb, vlan_tag); + if (skb) + skb->vlan_tci = 0; } return skb; From 3be8fbab18fbc06b6ff94a56f9c225e29ea64a73 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Apr 2013 03:17:12 +0000 Subject: [PATCH 199/303] tuntap: fix error return code in tun_set_iff() Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in this function. [ Bug added in linux-3.8 , commit 4008e97f866db665 ("tuntap: fix ambigious multiqueue API") ] Signed-off-by: Wei Yongjun Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b7c457adc0dc..729ed533bb33 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1594,7 +1594,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (tun->flags & TUN_TAP_MQ && (tun->numqueues + tun->numdisabled > 1)) - return err; + return -EBUSY; } else { char *name; From fb745e9a037895321781d066aa24757ceabf9df9 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 11 Apr 2013 13:47:15 +0000 Subject: [PATCH 200/303] net/802/mrp: fix possible race condition when calling mrp_pdu_queue() (Adapted from a very similar change to net/802/garp.c by Cong Wang.) mrp_pdu_queue() should ways be called with the applicant spin lock. mrp_uninit_applicant() only holds the rtnl lock which is not enough; a race is possible because mrp_rcv() is called in BH context: mrp_rcv() |->mrp_pdu_parse_msg() |->mrp_pdu_parse_vecattr() |->mrp_pdu_parse_vecattr_event() |-> mrp_attr_event() |-> mrp_pdu_append_vecattr_event() |-> mrp_pdu_queue() Cc: Cong Wang Cc: Eric Dumazet Signed-off-by: David Ward Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/802/mrp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/802/mrp.c b/net/802/mrp.c index a4cc3229952a..e085bcc754f6 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -870,8 +870,12 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) * all pending messages before the applicant is gone. */ del_timer_sync(&app->join_timer); + + spin_lock(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); mrp_pdu_queue(app); + spin_unlock(&app->lock); + mrp_queue_xmit(app); dev_mc_del(dev, appl->group_address); From 7f49ef69db6bbf756c0abca7e9b65b32e999eec8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 12 Apr 2013 16:40:13 -0400 Subject: [PATCH 201/303] ftrace: Move ftrace_filter_lseek out of CONFIG_DYNAMIC_FTRACE section As ftrace_filter_lseek is now used with ftrace_pid_fops, it needs to be moved out of the #ifdef CONFIG_DYNAMIC_FTRACE section as the ftrace_pid_fops is defined when DYNAMIC_FTRACE is not. Cc: stable@vger.kernel.org Cc: Namhyung Kim Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 ++- kernel/trace/ftrace.c | 28 ++++++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index eb3ce327b975..52da2a250795 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -396,7 +396,6 @@ ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos); ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos); -loff_t ftrace_filter_lseek(struct file *file, loff_t offset, int whence); int ftrace_regex_release(struct inode *inode, struct file *file); void __init @@ -569,6 +568,8 @@ static inline int ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; } #endif /* CONFIG_DYNAMIC_FTRACE */ +loff_t ftrace_filter_lseek(struct file *file, loff_t offset, int whence); + /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index affc35d829cc..2461ede45a8d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1052,6 +1052,19 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) static struct pid * const ftrace_swapper_pid = &init_struct_pid; +loff_t +ftrace_filter_lseek(struct file *file, loff_t offset, int whence) +{ + loff_t ret; + + if (file->f_mode & FMODE_READ) + ret = seq_lseek(file, offset, whence); + else + file->f_pos = ret = 1; + + return ret; +} + #ifdef CONFIG_DYNAMIC_FTRACE #ifndef CONFIG_FTRACE_MCOUNT_RECORD @@ -2612,7 +2625,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash) * routine, you can use ftrace_filter_write() for the write * routine if @flag has FTRACE_ITER_FILTER set, or * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. - * ftrace_regex_lseek() should be used as the lseek routine, and + * ftrace_filter_lseek() should be used as the lseek routine, and * release must call ftrace_regex_release(). */ int @@ -2696,19 +2709,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file) inode, file); } -loff_t -ftrace_filter_lseek(struct file *file, loff_t offset, int whence) -{ - loff_t ret; - - if (file->f_mode & FMODE_READ) - ret = seq_lseek(file, offset, whence); - else - file->f_pos = ret = 1; - - return ret; -} - static int ftrace_match(char *str, char *regex, int len, int type) { int matched = 0; From 1de14c3c5cbc9bb17e9dcc648cda51c0c85d54b9 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 12 Apr 2013 16:23:54 -0700 Subject: [PATCH 202/303] x86-32: Fix possible incomplete TLB invalidate with PAE pagetables This patch attempts to fix: https://bugzilla.kernel.org/show_bug.cgi?id=56461 The symptom is a crash and messages like this: chrome: Corrupted page table at address 34a03000 *pdpt = 0000000000000000 *pde = 0000000000000000 Bad pagetable: 000f [#1] PREEMPT SMP Ingo guesses this got introduced by commit 611ae8e3f520 ("x86/tlb: enable tlb flush range support for x86") since that code started to free unused pagetables. On x86-32 PAE kernels, that new code has the potential to free an entire PMD page and will clear one of the four page-directory-pointer-table (aka pgd_t entries). The hardware aggressively "caches" these top-level entries and invlpg does not actually affect the CPU's copy. If we clear one we *HAVE* to do a full TLB flush, otherwise we might continue using a freed pmd page. (note, we do this properly on the population side in pud_populate()). This patch tracks whenever we clear one of these entries in the 'struct mmu_gather', and ensures that we follow up with a full tlb flush. BTW, I disassembled and checked that: if (tlb->fullmm == 0) and if (!tlb->fullmm && !tlb->need_flush_all) generate essentially the same code, so there should be zero impact there to the !PAE case. Signed-off-by: Dave Hansen Cc: Peter Anvin Cc: Ingo Molnar Cc: Artem S Tashkinov Signed-off-by: Linus Torvalds --- arch/x86/include/asm/tlb.h | 2 +- arch/x86/mm/pgtable.c | 7 +++++++ include/asm-generic/tlb.h | 7 ++++++- mm/memory.c | 1 + 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 4fef20773b8f..c7797307fc2b 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -7,7 +7,7 @@ #define tlb_flush(tlb) \ { \ - if (tlb->fullmm == 0) \ + if (!tlb->fullmm && !tlb->need_flush_all) \ flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \ else \ flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 193350b51f90..17fda6a8b3c2 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -58,6 +58,13 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); + /* + * NOTE! For PAE, any changes to the top page-directory-pointer-table + * entries need a full cr3 reload to flush. + */ +#ifdef CONFIG_X86_PAE + tlb->need_flush_all = 1; +#endif tlb_remove_page(tlb, virt_to_page(pmd)); } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 25f01d0bc149..b1b1fa6ffffe 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -99,7 +99,12 @@ struct mmu_gather { unsigned int need_flush : 1, /* Did free PTEs */ fast_mode : 1; /* No batching */ - unsigned int fullmm; + /* we are in the middle of an operation to clear + * a full mm and can make some optimizations */ + unsigned int fullmm : 1, + /* we have performed an operation which + * requires a complete flush of the tlb */ + need_flush_all : 1; struct mmu_gather_batch *active; struct mmu_gather_batch local; diff --git a/mm/memory.c b/mm/memory.c index 494526ae024a..13cbc420fead 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -216,6 +216,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) tlb->mm = mm; tlb->fullmm = fullmm; + tlb->need_flush_all = 0; tlb->start = -1UL; tlb->end = 0; tlb->need_flush = 0; From 4bc4bee4595662d8bff92180d5c32e3313a704b0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Apr 2013 20:50:09 +0000 Subject: [PATCH 203/303] Btrfs: make sure nbytes are right after log replay While trying to track down a tree log replay bug I noticed that fsck was always complaining about nbytes not being right for our fsynced file. That is because the new fsync stuff doesn't wait for ordered extents to complete, so the inodes nbytes are not necessarily updated properly when we log it. So to fix this we need to set nbytes to whatever it is on the inode that is on disk, so when we replay the extents we can just add the bytes that are being added as we replay the extent. This makes it work for the case that we have the wrong nbytes or the case that we logged everything and nbytes is actually correct. With this I'm no longer getting nbytes errors out of btrfsck. Cc: stable@vger.kernel.org Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 48 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 451fad96ecd1..ef96381569a4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -317,6 +317,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, unsigned long src_ptr; unsigned long dst_ptr; int overwrite_root = 0; + bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) overwrite_root = 1; @@ -326,6 +327,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, /* look for the key in the destination tree */ ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + if (ret == 0) { char *src_copy; char *dst_copy; @@ -367,6 +371,30 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, return 0; } + /* + * We need to load the old nbytes into the inode so when we + * replay the extents we've logged we get the right nbytes. + */ + if (inode_item) { + struct btrfs_inode_item *item; + u64 nbytes; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + nbytes = btrfs_inode_nbytes(path->nodes[0], item); + item = btrfs_item_ptr(eb, slot, + struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, nbytes); + } + } else if (inode_item) { + struct btrfs_inode_item *item; + + /* + * New inode, set nbytes to 0 so that the nbytes comes out + * properly when we replay the extents. + */ + item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, 0); } insert: btrfs_release_path(path); @@ -486,7 +514,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, int found_type; u64 extent_end; u64 start = key->offset; - u64 saved_nbytes; + u64 nbytes = 0; struct btrfs_file_extent_item *item; struct inode *inode = NULL; unsigned long size; @@ -496,10 +524,19 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, found_type = btrfs_file_extent_type(eb, item); if (found_type == BTRFS_FILE_EXTENT_REG || - found_type == BTRFS_FILE_EXTENT_PREALLOC) - extent_end = start + btrfs_file_extent_num_bytes(eb, item); - else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_type == BTRFS_FILE_EXTENT_PREALLOC) { + nbytes = btrfs_file_extent_num_bytes(eb, item); + extent_end = start + nbytes; + + /* + * We don't add to the inodes nbytes if we are prealloc or a + * hole. + */ + if (btrfs_file_extent_disk_bytenr(eb, item) == 0) + nbytes = 0; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size = btrfs_file_extent_inline_len(eb, item); + nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = ALIGN(start + size, root->sectorsize); } else { ret = 0; @@ -548,7 +585,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); BUG_ON(ret); @@ -635,7 +671,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); } - inode_set_bytes(inode, saved_nbytes); + inode_add_bytes(inode, nbytes); ret = btrfs_update_inode(trans, root, inode); out: if (inode) From 2e0cbf2cc2c9371f0aa198857d799175ffe231a6 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sat, 13 Apr 2013 06:18:56 +0000 Subject: [PATCH 204/303] net: mvmdio: add select PHYLIB The mvmdio driver uses the phylib API, so it should select the PHYLIB symbol, otherwise, a build with mvmdio (but without mvneta) fails to build with undefined symbols such as mdiobus_unregister, mdiobus_free, etc. The mvneta driver does not use the phylib API directly, so it does not need to select PHYLIB. It already selects the mvmdio driver anyway. Historically, this problem is due to the fact that the PHY handling was originally part of mvneta, and was later moved to a separate driver, without updating the Kconfig select statements accordingly. And since there was no functional reason to use mvmdio without mvneta, this case was not tested. Signed-off-by: Thomas Petazzoni Reported-by: Fengguang Wu Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index edfba9370922..434e33c527df 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -33,6 +33,7 @@ config MV643XX_ETH config MVMDIO tristate "Marvell MDIO interface support" + select PHYLIB ---help--- This driver supports the MDIO interface found in the network interface units of the Marvell EBU SoCs (Kirkwood, Orion5x, @@ -45,7 +46,6 @@ config MVMDIO config MVNETA tristate "Marvell Armada 370/XP network interface support" depends on MACH_ARMADA_370_XP - select PHYLIB select MVMDIO ---help--- This driver supports the network interface units in the From a49b7e82cab0f9b41f483359be83f44fbb6b4979 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 13 Apr 2013 15:15:30 -0700 Subject: [PATCH 205/303] kobject: fix kset_find_obj() race with concurrent last kobject_put() Anatol Pomozov identified a race condition that hits module unloading and re-loading. To quote Anatol: "This is a race codition that exists between kset_find_obj() and kobject_put(). kset_find_obj() might return kobject that has refcount equal to 0 if this kobject is freeing by kobject_put() in other thread. Here is timeline for the crash in case if kset_find_obj() searches for an object tht nobody holds and other thread is doing kobject_put() on the same kobject: THREAD A (calls kset_find_obj()) THREAD B (calls kobject_put()) splin_lock() atomic_dec_return(kobj->kref), counter gets zero here ... starts kobject cleanup .... spin_lock() // WAIT thread A in kobj_kset_leave() iterate over kset->list atomic_inc(kobj->kref) (counter becomes 1) spin_unlock() spin_lock() // taken // it does not know that thread A increased counter so it remove obj from list spin_unlock() vfree(module) // frees module object with containing kobj // kobj points to freed memory area!! kobject_put(kobj) // OOPS!!!! The race above happens because module.c tries to use kset_find_obj() when somebody unloads module. The module.c code was introduced in commit 6494a93d55fa" Anatol supplied a patch specific for module.c that worked around the problem by simply not using kset_find_obj() at all, but rather than make a local band-aid, this just fixes kset_find_obj() to be thread-safe using the proper model of refusing the get a new reference if the refcount has already dropped to zero. See examples of this proper refcount handling not only in the kref documentation, but in various other equivalent uses of this pattern by grepping for atomic_inc_not_zero(). [ Side note: the module race does indicate that module loading and unloading is not properly serialized wrt sysfs information using the module mutex. That may require further thought, but this is the correct fix at the kobject layer regardless. ] Reported-analyzed-and-tested-by: Anatol Pomozov Cc: Greg Kroah-Hartman Cc: Al Viro Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- lib/kobject.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/kobject.c b/lib/kobject.c index e07ee1fcd6f1..a65486613d79 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -529,6 +529,13 @@ struct kobject *kobject_get(struct kobject *kobj) return kobj; } +static struct kobject *kobject_get_unless_zero(struct kobject *kobj) +{ + if (!kref_get_unless_zero(&kobj->kref)) + kobj = NULL; + return kobj; +} + /* * kobject_cleanup - free kobject resources. * @kobj: object to cleanup @@ -751,7 +758,7 @@ struct kobject *kset_find_obj(struct kset *kset, const char *name) list_for_each_entry(k, &kset->list, entry) { if (kobject_name(k) && !strcmp(kobject_name(k), name)) { - ret = kobject_get(k); + ret = kobject_get_unless_zero(k); break; } } From 5b55d708335a9e3e4f61f2dadf7511502205ccd1 Mon Sep 17 00:00:00 2001 From: Suleiman Souhlal Date: Sat, 13 Apr 2013 16:03:06 -0700 Subject: [PATCH 206/303] vfs: Revert spurious fix to spinning prevention in prune_icache_sb Revert commit 62a3ddef6181 ("vfs: fix spinning prevention in prune_icache_sb"). This commit doesn't look right: since we are looking at the tail of the list (sb->s_inode_lru.prev) if we want to skip an inode, we should put it back at the head of the list instead of the tail, otherwise we will keep spinning on it. Discovered when investigating why prune_icache_sb came top in perf reports of a swapping load. Signed-off-by: Suleiman Souhlal Signed-off-by: Hugh Dickins Cc: stable@vger.kernel.org # v3.2+ Signed-off-by: Linus Torvalds --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index f5f7c06c36fb..a898b3d43ccf 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -725,7 +725,7 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan) * inode to the back of the list so we don't spin on it. */ if (!spin_trylock(&inode->i_lock)) { - list_move_tail(&inode->i_lru, &sb->s_inode_lru); + list_move(&inode->i_lru, &sb->s_inode_lru); continue; } From 09549cd01726a7ff8b102a93e46b059531583ab6 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 10 Apr 2013 14:36:22 +0200 Subject: [PATCH 207/303] watchdog: Revert the AT91RM9200_WATCHDOG dependency Compiling the at91rm9200_wdt.c driver without at91rm9200 support was leading to several errors: drivers/built-in.o: In function `at91_wdt_close': at91_adc.c:(.text+0xc9fe4): undefined reference to `at91_st_base' drivers/built-in.o: In function `at91_wdt_write': at91_adc.c:(.text+0xca004): undefined reference to `at91_st_base' drivers/built-in.o: In function `at91wdt_shutdown': at91_adc.c:(.text+0xca01c): undefined reference to `at91_st_base' drivers/built-in.o: In function `at91wdt_suspend': at91_adc.c:(.text+0xca038): undefined reference to `at91_st_base' drivers/built-in.o: In function `at91_wdt_open': at91_adc.c:(.text+0xca0cc): undefined reference to `at91_st_base' drivers/built-in.o:at91_adc.c:(.text+0xca2c8): more undefined references to `at91_st_base' follow So, reverting the modification of the "depends" Kconfig line introduced by patch a6a1bcd37 (watchdog: at91rm9200: add DT support) seems to be the good solution. Signed-off-by: Nicolas Ferre Acked-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 9fcc70c11cea..e89fc3133972 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -117,7 +117,7 @@ config ARM_SP805_WATCHDOG config AT91RM9200_WATCHDOG tristate "AT91RM9200 watchdog" - depends on ARCH_AT91 + depends on ARCH_AT91RM9200 help Watchdog timer embedded into AT91RM9200 chips. This will reboot your system when the timeout is reached. From 935d8aabd4331f47a89c3e1daa5779d23cf244ee Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 14 Apr 2013 10:06:31 -0700 Subject: [PATCH 208/303] Add file_ns_capable() helper function for open-time capability checking Nothing is using it yet, but this will allow us to delay the open-time checks to use time, without breaking the normal UNIX permission semantics where permissions are determined by the opener (and the file descriptor can then be passed to a different process, or the process can drop capabilities). Signed-off-by: Linus Torvalds --- include/linux/capability.h | 2 ++ kernel/capability.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/linux/capability.h b/include/linux/capability.h index 98503b792369..d9a4f7f40f32 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -35,6 +35,7 @@ struct cpu_vfs_cap_data { #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) +struct file; struct inode; struct dentry; struct user_namespace; @@ -211,6 +212,7 @@ extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool nsown_capable(int cap); extern bool inode_capable(const struct inode *inode, int cap); +extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/kernel/capability.c b/kernel/capability.c index 493d97259484..f6c2ce5701e1 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -392,6 +392,30 @@ bool ns_capable(struct user_namespace *ns, int cap) } EXPORT_SYMBOL(ns_capable); +/** + * file_ns_capable - Determine if the file's opener had a capability in effect + * @file: The file we want to check + * @ns: The usernamespace we want the capability in + * @cap: The capability to be tested for + * + * Return true if task that opened the file had a capability in effect + * when the file was opened. + * + * This does not set PF_SUPERPRIV because the caller may not + * actually be privileged. + */ +bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap) +{ + if (WARN_ON_ONCE(!cap_valid(cap))) + return false; + + if (security_capable(file->f_cred, ns, cap) == 0) + return true; + + return false; +} +EXPORT_SYMBOL(file_ns_capable); + /** * capable - Determine if the current task has a superior capability in effect * @cap: The capability to be tested for From f88c91ddba958e9a5dd4a5ee8c52a0faa790f586 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 14 Apr 2013 23:18:43 +0800 Subject: [PATCH 209/303] ipv6: statically link register_inet6addr_notifier() Tomas reported the following build error: net/built-in.o: In function `ieee80211_unregister_hw': (.text+0x10f0e1): undefined reference to `unregister_inet6addr_notifier' net/built-in.o: In function `ieee80211_register_hw': (.text+0x10f610): undefined reference to `register_inet6addr_notifier' make: *** [vmlinux] Error 1 when built IPv6 as a module. So we have to statically link these symbols. Reported-by: Tomas Melin Cc: Tomas Melin Cc: "David S. Miller" Cc: YOSHIFUJI Hidaki Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + net/ipv6/addrconf.c | 24 +++--------------------- net/ipv6/addrconf_core.c | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 40be2a0d8ae1..84a6440f1f19 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -199,6 +199,7 @@ extern bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, /* Device notifier */ extern int register_inet6addr_notifier(struct notifier_block *nb); extern int unregister_inet6addr_notifier(struct notifier_block *nb); +extern int inet6addr_notifier_call_chain(unsigned long val, void *v); extern void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, struct ipv6_devconf *devconf); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a459c4f5b769..dae802c0af7c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -168,8 +168,6 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev); -static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); - static struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, @@ -837,7 +835,7 @@ out2: rcu_read_unlock_bh(); if (likely(err == 0)) - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); + inet6addr_notifier_call_chain(NETDEV_UP, ifa); else { kfree(ifa); ifa = ERR_PTR(err); @@ -927,7 +925,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ipv6_ifa_notify(RTM_DELADDR, ifp); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifp); /* * Purge or update corresponding prefix @@ -2988,7 +2986,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); } in6_ifa_put(ifa); @@ -4869,22 +4867,6 @@ static struct pernet_operations addrconf_ops = { .exit = addrconf_exit_net, }; -/* - * Device notifier - */ - -int register_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&inet6addr_chain, nb); -} -EXPORT_SYMBOL(register_inet6addr_notifier); - -int unregister_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&inet6addr_chain, nb); -} -EXPORT_SYMBOL(unregister_inet6addr_notifier); - static struct rtnl_af_ops inet6_ops = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index d051e5f4bf34..72104562c864 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -78,3 +78,22 @@ int __ipv6_addr_type(const struct in6_addr *addr) } EXPORT_SYMBOL(__ipv6_addr_type); +static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); + +int register_inet6addr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&inet6addr_chain, nb); +} +EXPORT_SYMBOL(register_inet6addr_notifier); + +int unregister_inet6addr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&inet6addr_chain, nb); +} +EXPORT_SYMBOL(unregister_inet6addr_notifier); + +int inet6addr_notifier_call_chain(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&inet6addr_chain, val, v); +} +EXPORT_SYMBOL(inet6addr_notifier_call_chain); From 41ef2d5678d83af030125550329b6ae8b74618fa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 14 Apr 2013 17:45:16 -0700 Subject: [PATCH 210/303] Linux 3.9-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6db672b15bda..9cf6783c2ec3 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Unicycling Gorilla # *DOCUMENTATION* From 6708075f104c3c9b04b23336bb0366ca30c3931b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 14 Apr 2013 13:47:02 -0700 Subject: [PATCH 211/303] userns: Don't let unprivileged users trick privileged users into setting the id_map When we require privilege for setting /proc//uid_map or /proc//gid_map no longer allow an unprivileged user to open the file and pass it to a privileged program to write to the file. Instead when privilege is required require both the opener and the writer to have the necessary capabilities. I have tested this code and verified that setting /proc//uid_map fails when an unprivileged user opens the file and a privielged user attempts to set the mapping, that unprivileged users can still map their own id, and that a privileged users can still setup an arbitrary mapping. Reported-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: Andy Lutomirski --- kernel/user_namespace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index a54f26f82eb2..e2d4ace4481b 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -25,7 +25,8 @@ static struct kmem_cache *user_ns_cachep __read_mostly; -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file, + struct user_namespace *ns, int cap_setid, struct uid_gid_map *map); static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) @@ -700,7 +701,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EPERM; /* Validate the user is allowed to use user id's mapped to. */ - if (!new_idmap_permitted(ns, cap_setid, &new_map)) + if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) goto out; /* Map the lower ids from the parent user namespace to the @@ -787,7 +788,8 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t &ns->projid_map, &ns->parent->projid_map); } -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file, + struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { /* Allow mapping to your own filesystem ids */ @@ -811,8 +813,10 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, /* Allow the specified ids if we have the appropriate capability * (CAP_SETUID or CAP_SETGID) over the parent user namespace. + * And the opener of the id file also had the approprpiate capability. */ - if (ns_capable(ns->parent, cap_setid)) + if (ns_capable(ns->parent, cap_setid) && + file_ns_capable(file, ns->parent, cap_setid)) return true; return false; From e3211c120a85b792978bcb4be7b2886df18d27f0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 14 Apr 2013 16:28:19 -0700 Subject: [PATCH 212/303] userns: Check uid_map's opener's fsuid, not the current fsuid Signed-off-by: Andy Lutomirski --- kernel/user_namespace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index e2d4ace4481b..5c16f3aa757a 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -797,12 +797,12 @@ static bool new_idmap_permitted(const struct file *file, u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, current_fsuid())) + if (uid_eq(uid, file->f_cred->fsuid)) return true; } else if (cap_setid == CAP_SETGID) { kgid_t gid = make_kgid(ns->parent, id); - if (gid_eq(gid, current_fsgid())) + if (gid_eq(gid, file->f_cred->fsgid)) return true; } } From 41c21e351e79004dbb4efa4bc14a53a7e0af38c5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 14 Apr 2013 11:44:04 -0700 Subject: [PATCH 213/303] userns: Changing any namespace id mappings should require privileges Changing uid/gid/projid mappings doesn't change your id within the namespace; it reconfigures the namespace. Unprivileged programs should *not* be able to write these files. (We're also checking the privileges on the wrong task.) Given the write-once nature of these files and the other security checks, this is likely impossible to usefully exploit. Signed-off-by: Andy Lutomirski --- kernel/user_namespace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 5c16f3aa757a..e134d8f365dd 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -613,10 +613,10 @@ static ssize_t map_write(struct file *file, const char __user *buf, if (map->nr_extents != 0) goto out; - /* Require the appropriate privilege CAP_SETUID or CAP_SETGID - * over the user namespace in order to set the id mapping. + /* + * Adjusting namespace settings requires capabilities on the target. */ - if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid)) + if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) goto out; /* Get a buffer */ From 05e38e5d5d437c762c79428ae8434632a8ca2c5e Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 15 Apr 2013 11:44:14 +1000 Subject: [PATCH 214/303] powerpc: Fix audit crash due to save/restore PPR changes The current mainline crashes when hitting userspace with the following: kernel BUG at kernel/auditsc.c:1769! cpu 0x1: Vector: 700 (Program Check) at [c000000023883a60] pc: c0000000001047a8: .__audit_syscall_entry+0x38/0x130 lr: c00000000000ed64: .do_syscall_trace_enter+0xc4/0x270 sp: c000000023883ce0 msr: 8000000000029032 current = 0xc000000023800000 paca = 0xc00000000f080380 softe: 0 irq_happened: 0x01 pid = 1629, comm = start_udev kernel BUG at kernel/auditsc.c:1769! enter ? for help [c000000023883d80] c00000000000ed64 .do_syscall_trace_enter+0xc4/0x270 [c000000023883e30] c000000000009b08 syscall_dotrace+0xc/0x38 --- Exception: c00 (System Call) at 0000008010ec50dc Bisecting found the following patch caused it: commit 44e9309f1f357794b7ae93d5f3e3e6f11d2b8a7f Author: Haren Myneni powerpc: Implement PPR save/restore It was found this patch corrupted r9 when calling SET_DEFAULT_THREAD_PPR() Using r10 as a scratch register instead of r9 solved the problem. Signed-off-by: Alistair Popple Acked-by: Michael Neuling Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 256c5bf0adb7..3acb1a076d3f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -304,7 +304,7 @@ syscall_exit_work: subi r12,r12,TI_FLAGS 4: /* Anything else left to do? */ - SET_DEFAULT_THREAD_PPR(r3, r9) /* Set thread.ppr = 3 */ + SET_DEFAULT_THREAD_PPR(r3, r10) /* Set thread.ppr = 3 */ andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) beq .ret_from_except_lite From d8b92292408831d86ff7b781e66bf79301934b99 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 9 Apr 2013 22:31:24 +0000 Subject: [PATCH 215/303] powerpc: add a missing label in resume_kernel A label 0 was missed in the patch a9c4e541 (powerpc/kprobe: Complete kprobe and migrate exception frame). This will cause the kernel branch to an undetermined address if there really has a conflict when updating the thread flags. Signed-off-by: Kevin Hao Cc: stable@vger.kernel.org Acked-By: Tiejun Chen Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3acb1a076d3f..04d69c4a5ac2 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -657,7 +657,7 @@ resume_kernel: /* Clear _TIF_EMULATE_STACK_STORE flag */ lis r11,_TIF_EMULATE_STACK_STORE@h addi r5,r9,TI_FLAGS - ldarx r4,0,r5 +0: ldarx r4,0,r5 andc r4,r4,r11 stdcx. r4,0,r5 bne- 0b From 8176cced706b5e5d15887584150764894e94e02f Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Sat, 13 Apr 2013 22:49:14 +0300 Subject: [PATCH 216/303] perf: Treat attr.config as u64 in perf_swevent_init() Trinity discovered that we fail to check all 64 bits of attr.config passed by user space, resulting to out-of-bounds access of the perf_swevent_enabled array in sw_perf_event_destroy(). Introduced in commit b0a873ebb ("perf: Register PMU implementations"). Signed-off-by: Tommi Rantala Cc: Peter Zijlstra Cc: davej@redhat.com Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1365882554-30259-1-git-send-email-tt.rantala@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7e0962ed7f8a..4d3124b39277 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5331,7 +5331,7 @@ static void sw_perf_event_destroy(struct perf_event *event) static int perf_swevent_init(struct perf_event *event) { - int event_id = event->attr.config; + u64 event_id = event->attr.config; if (event->attr.type != PERF_TYPE_SOFTWARE) return -ENOENT; From 202dec868a769af726e262790bf00b09ec929b52 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 15 Apr 2013 09:10:34 -0700 Subject: [PATCH 217/303] Input: wacom - correct reported resolution for Intuos4 Wireless Reported-by: Przemo Firszt Signed-off-by: Jason Gerecke Tested-by: Przemo Firszt Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index ed78be901b65..49fdff02b5fe 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -1912,7 +1912,7 @@ static const struct wacom_features wacom_features_0xBB = { "Wacom Intuos4 12x19", WACOM_PKGLEN_INTUOS, 97536, 60960, 2047, 63, INTUOS4L, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES }; static const struct wacom_features wacom_features_0xBC = - { "Wacom Intuos4 WL", WACOM_PKGLEN_INTUOS, 40840, 25400, 2047, + { "Wacom Intuos4 WL", WACOM_PKGLEN_INTUOS, 40640, 25400, 2047, 63, INTUOS4, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES }; static const struct wacom_features wacom_features_0x26 = { "Wacom Intuos5 touch S", WACOM_PKGLEN_INTUOS, 31496, 19685, 2047, From 8f3359bdc83f1abb1989e0817ab0e0b18667c828 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sat, 13 Apr 2013 14:06:07 +0000 Subject: [PATCH 218/303] bridge: make user modified path cost sticky Keep a STP port path cost value if it was set by a user. Don't replace it with the link-speed based path cost whenever the link goes down and comes back up. Reported-by: Roopa Prabhu Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 3 ++- net/bridge/br_private.h | 1 + net/bridge/br_stp_if.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ef1b91431c6b..459dab22b3f6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -67,7 +67,8 @@ void br_port_carrier_check(struct net_bridge_port *p) struct net_device *dev = p->dev; struct net_bridge *br = p->br; - if (netif_running(dev) && netif_oper_up(dev)) + if (!(p->flags & BR_ADMIN_COST) && + netif_running(dev) && netif_oper_up(dev)) p->path_cost = port_cost(dev); if (!netif_running(br->dev)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3cbf5beb3d4b..d2c043a857b6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -156,6 +156,7 @@ struct net_bridge_port #define BR_BPDU_GUARD 0x00000002 #define BR_ROOT_BLOCK 0x00000004 #define BR_MULTICAST_FAST_LEAVE 0x00000008 +#define BR_ADMIN_COST 0x00000010 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 0bdb4ebd362b..d45e760141bb 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -288,6 +288,7 @@ int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost) path_cost > BR_MAX_PATH_COST) return -ERANGE; + p->flags |= BR_ADMIN_COST; p->path_cost = path_cost; br_configuration_update(p->br); br_port_state_selection(p->br); From 06848c10f720cbc20e3b784c0df24930b7304b93 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 13 Apr 2013 15:49:03 +0000 Subject: [PATCH 219/303] esp4: fix error return code in esp_output() Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in this function. Signed-off-by: Wei Yongjun Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 3b4f0cd2e63e..4cfe34d4cc96 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -139,8 +139,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ - err = -ENOMEM; - esp = x->data; aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -176,8 +174,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto error; + } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); From ee40a116ebf139f900c3d2e6febb8388738e96d0 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 11 Apr 2013 23:00:37 +0200 Subject: [PATCH 220/303] net: mvneta: fix improper tx queue usage in mvneta_tx() mvneta_tx() was using a static tx queue number causing crashes as soon as a little bit of traffic was sent via the interface, because it is normally expected that the same queue should be used as in dev_queue_xmit(). As suggested by Ben Hutchings, let's use skb_get_queue_mapping() to get the proper Tx queue number, and use alloc_etherdev_mqs() instead of alloc_etherdev_mq() to create the queues. Both my Mirabox and my OpenBlocks AX3 used to crash without this patch and don't anymore with it. The issue appeared in 3.8 but became more visible after the fix allowing GSO to be enabled. Original work was done by Dmitri Epshtein and Thomas Petazzoni. I just adapted it to take care of Ben's comments. Signed-off-by: Willy Tarreau Cc: Dmitri Epshtein Cc: Thomas Petazzoni Cc: Gregory CLEMENT Cc: Ben Hutchings Tested-by: Gregory CLEMENT Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 1e628ce57201..a47a097c21e1 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -374,7 +374,6 @@ static int rxq_number = 8; static int txq_number = 8; static int rxq_def; -static int txq_def; #define MVNETA_DRIVER_NAME "mvneta" #define MVNETA_DRIVER_VERSION "1.0" @@ -1475,7 +1474,8 @@ error: static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) { struct mvneta_port *pp = netdev_priv(dev); - struct mvneta_tx_queue *txq = &pp->txqs[txq_def]; + u16 txq_id = skb_get_queue_mapping(skb); + struct mvneta_tx_queue *txq = &pp->txqs[txq_id]; struct mvneta_tx_desc *tx_desc; struct netdev_queue *nq; int frags = 0; @@ -1485,7 +1485,7 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) goto out; frags = skb_shinfo(skb)->nr_frags + 1; - nq = netdev_get_tx_queue(dev, txq_def); + nq = netdev_get_tx_queue(dev, txq_id); /* Get a descriptor for the first part of the packet */ tx_desc = mvneta_txq_next_desc_get(txq); @@ -2689,7 +2689,7 @@ static int mvneta_probe(struct platform_device *pdev) return -EINVAL; } - dev = alloc_etherdev_mq(sizeof(struct mvneta_port), 8); + dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, rxq_number); if (!dev) return -ENOMEM; @@ -2844,4 +2844,3 @@ module_param(rxq_number, int, S_IRUGO); module_param(txq_number, int, S_IRUGO); module_param(rxq_def, int, S_IRUGO); -module_param(txq_def, int, S_IRUGO); From 91c4166c1a01c00b8bed74f7a7defa620071de88 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Mon, 15 Apr 2013 07:31:28 +0000 Subject: [PATCH 221/303] drivers: net: ethernet: cpsw: get slave VLAN id from slave node instead of cpsw node Dual EMAC slave VLAN id must be got from slave node instead of cpsw node as VLAN id for each slave will be different. Reported-by: Mark Jackson Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 80cad06e5eb2..4781d3d8e182 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1380,7 +1380,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN); if (data->dual_emac) { - if (of_property_read_u32(node, "dual_emac_res_vlan", + if (of_property_read_u32(slave_node, "dual_emac_res_vlan", &prop)) { pr_err("Missing dual_emac_res_vlan in DT.\n"); slave_data->dual_emac_res_vlan = i+1; From 0635eb8a54cf0fea64b174bb68bc36b9c3d622db Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 15 Apr 2013 13:09:45 -0700 Subject: [PATCH 222/303] Move utf16 functions to kernel core and rename We want to be able to use the utf16 functions that are currently present in the EFI variables code in platform-specific code as well. Move them to the kernel core, and in the process rename them to accurately describe what they do - they don't handle UTF16, only UCS2. Signed-off-by: Matthew Garrett Signed-off-by: Matt Fleming --- drivers/firmware/Kconfig | 1 + drivers/firmware/efivars.c | 80 +++++++++---------------------------- include/linux/ucs2_string.h | 14 +++++++ lib/Kconfig | 3 ++ lib/Makefile | 2 + lib/ucs2_string.c | 51 +++++++++++++++++++++++ 6 files changed, 89 insertions(+), 62 deletions(-) create mode 100644 include/linux/ucs2_string.h create mode 100644 lib/ucs2_string.c diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 42c759a4d047..3e532002e4d1 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -39,6 +39,7 @@ config FIRMWARE_MEMMAP config EFI_VARS tristate "EFI Variable Support via sysfs" depends on EFI + select UCS2_STRING default n help If you say Y here, you are able to get EFI (Extensible Firmware diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index bf15d81d74e1..182ce9471175 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include @@ -172,51 +173,6 @@ static void efivar_update_sysfs_entries(struct work_struct *); static DECLARE_WORK(efivar_work, efivar_update_sysfs_entries); static bool efivar_wq_enabled = true; -/* Return the number of unicode characters in data */ -static unsigned long -utf16_strnlen(efi_char16_t *s, size_t maxlength) -{ - unsigned long length = 0; - - while (*s++ != 0 && length < maxlength) - length++; - return length; -} - -static inline unsigned long -utf16_strlen(efi_char16_t *s) -{ - return utf16_strnlen(s, ~0UL); -} - -/* - * Return the number of bytes is the length of this string - * Note: this is NOT the same as the number of unicode characters - */ -static inline unsigned long -utf16_strsize(efi_char16_t *data, unsigned long maxlength) -{ - return utf16_strnlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); -} - -static inline int -utf16_strncmp(const efi_char16_t *a, const efi_char16_t *b, size_t len) -{ - while (1) { - if (len == 0) - return 0; - if (*a < *b) - return -1; - if (*a > *b) - return 1; - if (*a == 0) /* implies *b == 0 */ - return 0; - a++; - b++; - len--; - } -} - static bool validate_device_path(struct efi_variable *var, int match, u8 *buffer, unsigned long len) @@ -268,7 +224,7 @@ validate_load_option(struct efi_variable *var, int match, u8 *buffer, u16 filepathlength; int i, desclength = 0, namelen; - namelen = utf16_strnlen(var->VariableName, sizeof(var->VariableName)); + namelen = ucs2_strnlen(var->VariableName, sizeof(var->VariableName)); /* Either "Boot" or "Driver" followed by four digits of hex */ for (i = match; i < match+4; i++) { @@ -291,7 +247,7 @@ validate_load_option(struct efi_variable *var, int match, u8 *buffer, * There's no stored length for the description, so it has to be * found by hand */ - desclength = utf16_strsize((efi_char16_t *)(buffer + 6), len - 6) + 2; + desclength = ucs2_strsize((efi_char16_t *)(buffer + 6), len - 6) + 2; /* Each boot entry must have a descriptor */ if (!desclength) @@ -581,7 +537,7 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) spin_lock_irq(&efivars->lock); status = check_var_size_locked(efivars, new_var->Attributes, - new_var->DataSize + utf16_strsize(new_var->VariableName, 1024)); + new_var->DataSize + ucs2_strsize(new_var->VariableName, 1024)); if (status == EFI_SUCCESS || status == EFI_UNSUPPORTED) status = efivars->ops->set_variable(new_var->VariableName, @@ -759,7 +715,7 @@ static ssize_t efivarfs_file_write(struct file *file, * QueryVariableInfo() isn't supported by the firmware. */ - varsize = datasize + utf16_strsize(var->var.VariableName, 1024); + varsize = datasize + ucs2_strsize(var->var.VariableName, 1024); status = check_var_size(efivars, attributes, varsize); if (status != EFI_SUCCESS) { @@ -1211,7 +1167,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) inode = NULL; - len = utf16_strlen(entry->var.VariableName); + len = ucs2_strlen(entry->var.VariableName); /* name, plus '-', plus GUID, plus NUL*/ name = kmalloc(len + 1 + GUID_LEN + 1, GFP_ATOMIC); @@ -1469,8 +1425,8 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count, if (efi_guidcmp(entry->var.VendorGuid, vendor)) continue; - if (utf16_strncmp(entry->var.VariableName, efi_name, - utf16_strlen(efi_name))) { + if (ucs2_strncmp(entry->var.VariableName, efi_name, + ucs2_strlen(efi_name))) { /* * Check if an old format, * which doesn't support holding @@ -1482,8 +1438,8 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count, for (i = 0; i < DUMP_NAME_LEN; i++) efi_name_old[i] = name_old[i]; - if (utf16_strncmp(entry->var.VariableName, efi_name_old, - utf16_strlen(efi_name_old))) + if (ucs2_strncmp(entry->var.VariableName, efi_name_old, + ucs2_strlen(efi_name_old))) continue; } @@ -1561,8 +1517,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, * Does this variable already exist? */ list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { - strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024); - strsize2 = utf16_strsize(new_var->VariableName, 1024); + strsize1 = ucs2_strsize(search_efivar->var.VariableName, 1024); + strsize2 = ucs2_strsize(new_var->VariableName, 1024); if (strsize1 == strsize2 && !memcmp(&(search_efivar->var.VariableName), new_var->VariableName, strsize1) && @@ -1578,7 +1534,7 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, } status = check_var_size_locked(efivars, new_var->Attributes, - new_var->DataSize + utf16_strsize(new_var->VariableName, 1024)); + new_var->DataSize + ucs2_strsize(new_var->VariableName, 1024)); if (status && status != EFI_UNSUPPORTED) { spin_unlock_irq(&efivars->lock); @@ -1602,7 +1558,7 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, /* Create the entry in sysfs. Locking is not required here */ status = efivar_create_sysfs_entry(efivars, - utf16_strsize(new_var->VariableName, + ucs2_strsize(new_var->VariableName, 1024), new_var->VariableName, &new_var->VendorGuid); @@ -1632,8 +1588,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, * Does this variable already exist? */ list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { - strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024); - strsize2 = utf16_strsize(del_var->VariableName, 1024); + strsize1 = ucs2_strsize(search_efivar->var.VariableName, 1024); + strsize2 = ucs2_strsize(del_var->VariableName, 1024); if (strsize1 == strsize2 && !memcmp(&(search_efivar->var.VariableName), del_var->VariableName, strsize1) && @@ -1679,9 +1635,9 @@ static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor) unsigned long strsize1, strsize2; bool found = false; - strsize1 = utf16_strsize(variable_name, 1024); + strsize1 = ucs2_strsize(variable_name, 1024); list_for_each_entry_safe(entry, n, &efivars->list, list) { - strsize2 = utf16_strsize(entry->var.VariableName, 1024); + strsize2 = ucs2_strsize(entry->var.VariableName, 1024); if (strsize1 == strsize2 && !memcmp(variable_name, &(entry->var.VariableName), strsize2) && diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h new file mode 100644 index 000000000000..cbb20afdbc01 --- /dev/null +++ b/include/linux/ucs2_string.h @@ -0,0 +1,14 @@ +#ifndef _LINUX_UCS2_STRING_H_ +#define _LINUX_UCS2_STRING_H_ + +#include /* for size_t */ +#include /* for NULL */ + +typedef u16 ucs2_char_t; + +unsigned long ucs2_strnlen(const ucs2_char_t *s, size_t maxlength); +unsigned long ucs2_strlen(const ucs2_char_t *s); +unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); +int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); + +#endif /* _LINUX_UCS2_STRING_H_ */ diff --git a/lib/Kconfig b/lib/Kconfig index 3958dc4389f9..fe01d418b09a 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -404,4 +404,7 @@ config OID_REGISTRY help Enable fast lookup object identifier registry. +config UCS2_STRING + tristate + endmenu diff --git a/lib/Makefile b/lib/Makefile index d7946ff75b2e..6e2cc561f761 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -174,3 +174,5 @@ quiet_cmd_build_OID_registry = GEN $@ cmd_build_OID_registry = perl $(srctree)/$(src)/build_OID_registry $< $@ clean-files += oid_registry_data.c + +obj-$(CONFIG_UCS2_STRING) += ucs2_string.o diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c new file mode 100644 index 000000000000..6f500ef2301d --- /dev/null +++ b/lib/ucs2_string.c @@ -0,0 +1,51 @@ +#include +#include + +/* Return the number of unicode characters in data */ +unsigned long +ucs2_strnlen(const ucs2_char_t *s, size_t maxlength) +{ + unsigned long length = 0; + + while (*s++ != 0 && length < maxlength) + length++; + return length; +} +EXPORT_SYMBOL(ucs2_strnlen); + +unsigned long +ucs2_strlen(const ucs2_char_t *s) +{ + return ucs2_strnlen(s, ~0UL); +} +EXPORT_SYMBOL(ucs2_strlen); + +/* + * Return the number of bytes is the length of this string + * Note: this is NOT the same as the number of unicode characters + */ +unsigned long +ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength) +{ + return ucs2_strnlen(data, maxlength/sizeof(ucs2_char_t)) * sizeof(ucs2_char_t); +} +EXPORT_SYMBOL(ucs2_strsize); + +int +ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len) +{ + while (1) { + if (len == 0) + return 0; + if (*a < *b) + return -1; + if (*a > *b) + return 1; + if (*a == 0) /* implies *b == 0 */ + return 0; + a++; + b++; + len--; + } +} +EXPORT_SYMBOL(ucs2_strncmp); From cc5a080c5d40c36089bb08a8a16fa3fc7047fe0f Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 15 Apr 2013 13:09:46 -0700 Subject: [PATCH 223/303] efi: Pass boot services variable info to runtime code EFI variables can be flagged as being accessible only within boot services. This makes it awkward for us to figure out how much space they use at runtime. In theory we could figure this out by simply comparing the results from QueryVariableInfo() to the space used by all of our variables, but that fails if the platform doesn't garbage collect on every boot. Thankfully, calling QueryVariableInfo() while still inside boot services gives a more reliable answer. This patch passes that information from the EFI boot stub up to the efi platform code. Signed-off-by: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 47 +++++++++++++++++++++++++++ arch/x86/include/asm/efi.h | 7 ++++ arch/x86/include/uapi/asm/bootparam.h | 1 + arch/x86/platform/efi/efi.c | 21 ++++++++++++ 4 files changed, 76 insertions(+) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c205035a6b96..8615f7581820 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -251,6 +251,51 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size) *size = len; } +static efi_status_t setup_efi_vars(struct boot_params *params) +{ + struct setup_data *data; + struct efi_var_bootdata *efidata; + u64 store_size, remaining_size, var_size; + efi_status_t status; + + if (!sys_table->runtime->query_variable_info) + return EFI_UNSUPPORTED; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + status = efi_call_phys4(sys_table->runtime->query_variable_info, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, &store_size, + &remaining_size, &var_size); + + if (status != EFI_SUCCESS) + return status; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, sizeof(*efidata), &efidata); + + if (status != EFI_SUCCESS) + return status; + + efidata->data.type = SETUP_EFI_VARS; + efidata->data.len = sizeof(struct efi_var_bootdata) - + sizeof(struct setup_data); + efidata->data.next = 0; + efidata->store_size = store_size; + efidata->remaining_size = remaining_size; + efidata->max_var_size = var_size; + + if (data) + data->next = (unsigned long)efidata; + else + params->hdr.setup_data = (unsigned long)efidata; + +} + static efi_status_t setup_efi_pci(struct boot_params *params) { efi_pci_io_protocol *pci; @@ -1157,6 +1202,8 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table, setup_graphics(boot_params); + setup_efi_vars(boot_params); + setup_efi_pci(boot_params); status = efi_call_phys3(sys_table->boottime->allocate_pool, diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 60c89f30c727..2fb5d5884e23 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -102,6 +102,13 @@ extern void efi_call_phys_epilog(void); extern void efi_unmap_memmap(void); extern void efi_memory_uc(u64 addr, unsigned long size); +struct efi_var_bootdata { + struct setup_data data; + u64 store_size; + u64 remaining_size; + u64 max_var_size; +}; + #ifdef CONFIG_EFI static inline bool efi_is_native(void) diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index c15ddaf90710..08744242b8d2 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -6,6 +6,7 @@ #define SETUP_E820_EXT 1 #define SETUP_DTB 2 #define SETUP_PCI 3 +#define SETUP_EFI_VARS 4 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 3f96a487aa2a..977d1ce7e173 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -69,6 +69,10 @@ struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static u64 efi_var_store_size; +static u64 efi_var_remaining_size; +static u64 efi_var_max_var_size; + unsigned long x86_efi_facility; /* @@ -682,6 +686,9 @@ void __init efi_init(void) char vendor[100] = "unknown"; int i = 0; void *tmp; + struct setup_data *data; + struct efi_var_bootdata *efi_var_data; + u64 pa_data; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -699,6 +706,20 @@ void __init efi_init(void) if (efi_systab_init(efi_phys.systab)) return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*efi_var_data)); + if (data->type == SETUP_EFI_VARS) { + efi_var_data = (struct efi_var_bootdata *)data; + + efi_var_store_size = efi_var_data->store_size; + efi_var_remaining_size = efi_var_data->remaining_size; + efi_var_max_var_size = efi_var_data->max_var_size; + } + pa_data = data->next; + early_iounmap(data, sizeof(*efi_var_data)); + } + set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); /* From 31ff2f20d9003e74991d135f56e503fe776c127c Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 15 Apr 2013 13:09:47 -0700 Subject: [PATCH 224/303] efi: Distinguish between "remaining space" and actually used space EFI implementations distinguish between space that is actively used by a variable and space that merely hasn't been garbage collected yet. Space that hasn't yet been garbage collected isn't available for use and so isn't counted in the remaining_space field returned by QueryVariableInfo(). Combined with commit 68d9298 this can cause problems. Some implementations don't garbage collect until the remaining space is smaller than the maximum variable size, and as a result check_var_size() will always fail once more than 50% of the variable store has been used even if most of that space is marked as available for garbage collection. The user is unable to create new variables, and deleting variables doesn't increase the remaining space. The problem that 68d9298 was attempting to avoid was one where certain platforms fail if the actively used space is greater than 50% of the available storage space. We should be able to calculate that by simply summing the size of each available variable and subtracting that from the total storage space. With luck this will fix the problem described in https://bugzilla.kernel.org/show_bug.cgi?id=55471 without permitting damage to occur to the machines 68d9298 was attempting to fix. Signed-off-by: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 106 ++++++++++++++++++++++++++++++++++-- 1 file changed, 100 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 977d1ce7e173..4959e3f89d74 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,13 @@ #define EFI_DEBUG 1 +/* + * There's some additional metadata associated with each + * variable. Intel's reference implementation is 60 bytes - bump that + * to account for potential alignment constraints + */ +#define VAR_METADATA_SIZE 64 + struct efi __read_mostly efi = { .mps = EFI_INVALID_TABLE_ADDR, .acpi = EFI_INVALID_TABLE_ADDR, @@ -72,6 +80,9 @@ static efi_system_table_t efi_systab __initdata; static u64 efi_var_store_size; static u64 efi_var_remaining_size; static u64 efi_var_max_var_size; +static u64 boot_used_size; +static u64 boot_var_size; +static u64 active_size; unsigned long x86_efi_facility; @@ -166,8 +177,53 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { - return efi_call_virt3(get_next_variable, - name_size, name, vendor); + efi_status_t status; + static bool finished = false; + static u64 var_size; + + status = efi_call_virt3(get_next_variable, + name_size, name, vendor); + + if (status == EFI_NOT_FOUND) { + finished = true; + if (var_size < boot_used_size) { + boot_var_size = boot_used_size - var_size; + active_size += boot_var_size; + } else { + printk(KERN_WARNING FW_BUG "efi: Inconsistent initial sizes\n"); + } + } + + if (boot_used_size && !finished) { + unsigned long size; + u32 attr; + efi_status_t s; + void *tmp; + + s = virt_efi_get_variable(name, vendor, &attr, &size, NULL); + + if (s != EFI_BUFFER_TOO_SMALL || !size) + return status; + + tmp = kmalloc(size, GFP_ATOMIC); + + if (!tmp) + return status; + + s = virt_efi_get_variable(name, vendor, &attr, &size, tmp); + + if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) { + var_size += size; + var_size += ucs2_strsize(name, 1024); + active_size += size; + active_size += VAR_METADATA_SIZE; + active_size += ucs2_strsize(name, 1024); + } + + kfree(tmp); + } + + return status; } static efi_status_t virt_efi_set_variable(efi_char16_t *name, @@ -176,9 +232,34 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, unsigned long data_size, void *data) { - return efi_call_virt5(set_variable, - name, vendor, attr, - data_size, data); + efi_status_t status; + u32 orig_attr = 0; + unsigned long orig_size = 0; + + status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size, + NULL); + + if (status != EFI_BUFFER_TOO_SMALL) + orig_size = 0; + + status = efi_call_virt5(set_variable, + name, vendor, attr, + data_size, data); + + if (status == EFI_SUCCESS) { + if (orig_size) { + active_size -= orig_size; + active_size -= ucs2_strsize(name, 1024); + active_size -= VAR_METADATA_SIZE; + } + if (data_size) { + active_size += data_size; + active_size += ucs2_strsize(name, 1024); + active_size += VAR_METADATA_SIZE; + } + } + + return status; } static efi_status_t virt_efi_query_variable_info(u32 attr, @@ -720,6 +801,8 @@ void __init efi_init(void) early_iounmap(data, sizeof(*efi_var_data)); } + boot_used_size = efi_var_store_size - efi_var_remaining_size; + set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); /* @@ -1042,10 +1125,21 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) if (!max_size && remaining_size > size) printk_once(KERN_ERR FW_BUG "Broken EFI implementation" " is returning MaxVariableSize=0\n"); + /* + * Some firmware implementations refuse to boot if there's insufficient + * space in the variable store. We account for that by refusing the + * write if permitting it would reduce the available space to under + * 50%. However, some firmware won't reclaim variable space until + * after the used (not merely the actively used) space drops below + * a threshold. We can approximate that case with the value calculated + * above. If both the firmware and our calculations indicate that the + * available space would drop below 50%, refuse the write. + */ if (!storage_size || size > remaining_size || (max_size && size > max_size) || - (remaining_size - size) < (storage_size / 2)) + ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) && + (remaining_size - size < storage_size / 2))) return EFI_OUT_OF_RESOURCES; return EFI_SUCCESS; From f1923820c447e986a9da0fc6bf60c1dccdf0408e Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 16 Apr 2013 13:51:43 +0200 Subject: [PATCH 225/303] perf/x86: Fix offcore_rsp valid mask for SNB/IVB The valid mask for both offcore_response_0 and offcore_response_1 was wrong for SNB/SNB-EP, IVB/IVB-EP. It was possible to write to reserved bit and cause a GP fault crashing the kernel. This patch fixes the problem by correctly marking the reserved bits in the valid mask for all the processors mentioned above. A distinction between desktop and server parts is introduced because bits 24-30 are only available on the server parts. This version of the patch is just a rebase to perf/urgent tree and should apply to older kernels as well. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: jolsa@redhat.com Cc: gregkh@linuxfoundation.org Cc: security@kernel.org Cc: ak@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index dab7580c47ae..cc45deb791b0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -153,8 +153,14 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = }; static struct extra_reg intel_snb_extra_regs[] __read_mostly = { - INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), - INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), + EVENT_EXTRA_END +}; + +static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), EVENT_EXTRA_END }; @@ -2097,7 +2103,10 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - x86_pmu.extra_regs = intel_snb_extra_regs; + if (boot_cpu_data.x86_model == 45) + x86_pmu.extra_regs = intel_snbep_extra_regs; + else + x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_NO_HT_SHARING; @@ -2123,7 +2132,10 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_ivb_event_constraints; x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - x86_pmu.extra_regs = intel_snb_extra_regs; + if (boot_cpu_data.x86_model == 62) + x86_pmu.extra_regs = intel_snbep_extra_regs; + else + x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_NO_HT_SHARING; From f6ce5002629e08eaa777ced3872a98f76845143e Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 16 Apr 2013 18:31:08 +0400 Subject: [PATCH 226/303] x86/Kconfig: Make EFI select UCS2_STRING The commit "efi: Distinguish between "remaining space" and actually used space" added usage of ucs2_*() functions to arch/x86/platform/efi/efi.c, but the only thing which selected UCS2_STRING was EFI_VARS, which is technically optional and can be built as a module. Signed-off-by: Sergey Vlasov Signed-off-by: Matt Fleming --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a4f24f5b1218..01af8535f58e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1549,6 +1549,7 @@ config X86_SMAP config EFI bool "EFI runtime service support" depends on ACPI + select UCS2_STRING ---help--- This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). From 3668011d4ad556224f7c012c1e870a6eaa0e59da Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 16 Apr 2013 18:31:09 +0400 Subject: [PATCH 227/303] efi: Export efi_query_variable_store() for efivars.ko Fixes build with CONFIG_EFI_VARS=m which was broken after the commit "x86, efivars: firmware bug workarounds should be in platform code". Signed-off-by: Sergey Vlasov Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4959e3f89d74..4f364c7c6111 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1144,3 +1144,4 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) return EFI_SUCCESS; } +EXPORT_SYMBOL_GPL(efi_query_variable_store); From 32b161aa88aa40a83888a995c6e2ef81140219b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 16 Apr 2013 00:17:07 +0000 Subject: [PATCH 228/303] net: cdc_mbim: remove bogus sizeof() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intention was to test against the constant, not the size of the constant. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 16c842997291..6bd91676d2cb 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -134,7 +134,7 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb goto error; if (skb) { - if (skb->len <= sizeof(ETH_HLEN)) + if (skb->len <= ETH_HLEN) goto error; /* mapping VLANs to MBIM sessions: From 8d7ed0f051caf192994c02fbefb859eac03d1646 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 16 Apr 2013 00:42:42 +0000 Subject: [PATCH 229/303] net: fec: fix regression in link change accounting A link-down isn't properly saved in the FEC state, so we wouldn't restart the FEC after a repeated link-up. Regression was introduced with commit d97e7497 "net: fec: restart the FEC when PHY speed changes" Signed-off-by: Lucas Stach Tested-by: Fabio Estevam Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c index f292c3aa423f..73195f643c9c 100644 --- a/drivers/net/ethernet/freescale/fec.c +++ b/drivers/net/ethernet/freescale/fec.c @@ -1002,6 +1002,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) } else { if (fep->link) { fec_stop(ndev); + fep->link = phy_dev->link; status_change = 1; } } From ca46e10fb239d4646cb19620695473f252a6ffc7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Apr 2013 10:43:13 +0100 Subject: [PATCH 230/303] ARM: KVM: fix KVM_CAP_ARM_SET_DEVICE_ADDR reporting Commit 3401d54696f9 (KVM: ARM: Introduce KVM_ARM_SET_DEVICE_ADDR ioctl) added support for the KVM_CAP_ARM_SET_DEVICE_ADDR capability, but failed to add a break in the relevant case statement, returning the number of CPUs instead. Luckilly enough, the CONFIG_NR_CPUS=0 patch hasn't been merged yet (https://lkml.org/lkml/diff/2012/3/31/131/1), so the bug wasn't noticed. Just give it a break! Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 5a936988eb24..c1fe498983ac 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -201,6 +201,7 @@ int kvm_dev_ioctl_check_extension(long ext) break; case KVM_CAP_ARM_SET_DEVICE_ADDR: r = 1; + break; case KVM_CAP_NR_VCPUS: r = num_online_cpus(); break; From 865499ea90d399e0682bcce3ae7af24277633699 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Apr 2013 14:00:16 +0100 Subject: [PATCH 231/303] ARM: KVM: fix L_PTE_S2_RDWR to actually be Read/Write Looks like our L_PTE_S2_RDWR definition is slightly wrong, and is actually write only (see ARM ARM Table B3-9, Stage 2 control of access permissions). Didn't make a difference for normal pages, as we OR the flags together, but I'm still wondering how it worked for Stage-2 mapped devices, such as the GIC. Brown paper bag time, again. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/pgtable-3level.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 6ef8afd1b64c..86b8fe398b95 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -111,7 +111,7 @@ #define L_PTE_S2_MT_WRITETHROUGH (_AT(pteval_t, 0xa) << 2) /* MemAttr[3:0] */ #define L_PTE_S2_MT_WRITEBACK (_AT(pteval_t, 0xf) << 2) /* MemAttr[3:0] */ #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ -#define L_PTE_S2_RDWR (_AT(pteval_t, 2) << 6) /* HAP[2:1] */ +#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ /* * Hyp-mode PL2 PTE definitions for LPAE. From b4cbb197c7e7a68dbad0d491242e3ca67420c13e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 16 Apr 2013 13:45:37 -0700 Subject: [PATCH 232/303] vm: add vm_iomap_memory() helper function Various drivers end up replicating the code to mmap() their memory buffers into user space, and our core memory remapping function may be very flexible but it is unnecessarily complicated for the common cases to use. Our internal VM uses pfn's ("page frame numbers") which simplifies things for the VM, and allows us to pass physical addresses around in a denser and more efficient format than passing a "phys_addr_t" around, and having to shift it up and down by the page size. But it just means that drivers end up doing that shifting instead at the interface level. It also means that drivers end up mucking around with internal VM things like the vma details (vm_pgoff, vm_start/end) way more than they really need to. So this just exports a function to map a certain physical memory range into user space (using a phys_addr_t based interface that is much more natural for a driver) and hides all the complexity from the driver. Some drivers will still end up tweaking the vm_page_prot details for things like prefetching or cacheability etc, but that's actually relevant to the driver, rather than caring about what the page offset of the mapping is into the particular IO memory region. Acked-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ mm/memory.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index e19ff30ad0a2..e2091b88d24c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1611,6 +1611,8 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); +int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); + struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags, diff --git a/mm/memory.c b/mm/memory.c index 13cbc420fead..ba94dec5b259 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2393,6 +2393,53 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(remap_pfn_range); +/** + * vm_iomap_memory - remap memory to userspace + * @vma: user vma to map to + * @start: start of area + * @len: size of area + * + * This is a simplified io_remap_pfn_range() for common driver use. The + * driver just needs to give us the physical memory range to be mapped, + * we'll figure out the rest from the vma information. + * + * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get + * whatever write-combining details or similar. + */ +int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) +{ + unsigned long vm_len, pfn, pages; + + /* Check that the physical memory area passed in looks valid */ + if (start + len < start) + return -EINVAL; + /* + * You *really* shouldn't map things that aren't page-aligned, + * but we've historically allowed it because IO memory might + * just have smaller alignment. + */ + len += start & ~PAGE_MASK; + pfn = start >> PAGE_SHIFT; + pages = (len + ~PAGE_MASK) >> PAGE_SHIFT; + if (pfn + pages < pfn) + return -EINVAL; + + /* We start the mapping 'vm_pgoff' pages into the area */ + if (vma->vm_pgoff > pages) + return -EINVAL; + pfn += vma->vm_pgoff; + pages -= vma->vm_pgoff; + + /* Can we fit all of the mapping? */ + vm_len = vma->vm_end - vma->vm_start; + if (vm_len >> PAGE_SHIFT > pages) + return -EINVAL; + + /* Ok, let it rip */ + return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); +} +EXPORT_SYMBOL(vm_iomap_memory); + static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, pte_fn_t fn, void *data) From 97599dc792b45b1669c3cdb9a4b365aad0232f65 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Apr 2013 12:55:41 +0000 Subject: [PATCH 233/303] net: drop dst before queueing fragments Commit 4a94445c9a5c (net: Use ip_route_input_noref() in input path) added a bug in IP defragmentation handling, as non refcounted dst could escape an RCU protected section. Commit 64f3b9e203bd068 (net: ip_expire() must revalidate route) fixed the case of timeouts, but not the general problem. Tom Parkin noticed crashes in UDP stack and provided a patch, but further analysis permitted us to pinpoint the root cause. Before queueing a packet into a frag list, we must drop its dst, as this dst has limited lifetime (RCU protected) When/if a packet is finally reassembled, we use the dst of the very last skb, still protected by RCU and valid, as the dst of the reassembled packet. Use same logic in IPv6, as there is no need to hold dst references. Reported-by: Tom Parkin Tested-by: Tom Parkin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 14 ++++++++++---- net/ipv6/reassembly.c | 12 ++++++++++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a6445b843ef4..52c273ea05c3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -248,8 +248,7 @@ static void ip_expire(unsigned long arg) if (!head->dev) goto out_rcu_unlock; - /* skb dst is stale, drop it, and perform route lookup again */ - skb_dst_drop(head); + /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); @@ -523,9 +522,16 @@ found: qp->q.max_size = skb->len + ihl; if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - qp->q.meat == qp->q.len) - return ip_frag_reasm(qp, prev, dev); + qp->q.meat == qp->q.len) { + unsigned long orefdst = skb->_skb_refdst; + skb->_skb_refdst = 0UL; + err = ip_frag_reasm(qp, prev, dev); + skb->_skb_refdst = orefdst; + return err; + } + + skb_dst_drop(skb); inet_frag_lru_move(&qp->q); return -EINPROGRESS; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 196ab9347ad1..0ba10e53a629 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -330,9 +330,17 @@ found: } if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) - return ip6_frag_reasm(fq, prev, dev); + fq->q.meat == fq->q.len) { + int res; + unsigned long orefdst = skb->_skb_refdst; + skb->_skb_refdst = 0UL; + res = ip6_frag_reasm(fq, prev, dev); + skb->_skb_refdst = orefdst; + return res; + } + + skb_dst_drop(skb); inet_frag_lru_move(&fq->q); return -1; From 361cd29cf9363505c2a35bbf9a034a22feebfb07 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Tue, 16 Apr 2013 18:11:10 +0000 Subject: [PATCH 234/303] qlcnic: fix beaconing test for 82xx adapter o Commit 319ecf121e1da3d75dd1bde32fed255532e61797 ("qlcnic: 83xx sysfs routines") introduced regression for beaconing test while refactoring 82xx code. This patch is to revert code to fix beaconing test for 82xx adapter. Signed-off-by: Himanshu Madhani Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index 987fb6f8adc3..5ef328af61d0 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -200,10 +200,10 @@ beacon_err: } err = qlcnic_config_led(adapter, b_state, b_rate); - if (!err) + if (!err) { err = len; - else ahw->beacon_state = b_state; + } if (test_and_clear_bit(__QLCNIC_DIAG_RES_ALLOC, &adapter->state)) qlcnic_diag_free_res(adapter->netdev, max_sds_rings); From 4c82456eeb4da081dd63dc69e91aa6deabd29e03 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 17 Apr 2013 12:30:40 +0200 Subject: [PATCH 235/303] fuse: fix type definitions in uapi header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7e98d53086d18c877cb44e9065219335184024de (Synchronize fuse header with one used in library) added #ifdef __linux__ around defines if it is not set. The kernel build is self-contained and can be built on non-Linux toolchains. After the mentioned commit builds on non-Linux toolchains will try to include stdint.h and fail due to -nostdinc, and then fail with a bunch of undefined type errors. Fix by checking for __KERNEL__ instead of __linux__ and using the standard int types instead of the linux specific ones. Reported-by: Arve Hjønnevåg Reported-by: Colin Cross Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 436 +++++++++++++++++++------------------- 1 file changed, 216 insertions(+), 220 deletions(-) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 4c43b4448792..706d035fa748 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -95,15 +95,10 @@ #ifndef _LINUX_FUSE_H #define _LINUX_FUSE_H -#ifdef __linux__ +#ifdef __KERNEL__ #include #else #include -#define __u64 uint64_t -#define __s64 int64_t -#define __u32 uint32_t -#define __s32 int32_t -#define __u16 uint16_t #endif /* @@ -139,42 +134,42 @@ userspace works under 64bit kernels */ struct fuse_attr { - __u64 ino; - __u64 size; - __u64 blocks; - __u64 atime; - __u64 mtime; - __u64 ctime; - __u32 atimensec; - __u32 mtimensec; - __u32 ctimensec; - __u32 mode; - __u32 nlink; - __u32 uid; - __u32 gid; - __u32 rdev; - __u32 blksize; - __u32 padding; + uint64_t ino; + uint64_t size; + uint64_t blocks; + uint64_t atime; + uint64_t mtime; + uint64_t ctime; + uint32_t atimensec; + uint32_t mtimensec; + uint32_t ctimensec; + uint32_t mode; + uint32_t nlink; + uint32_t uid; + uint32_t gid; + uint32_t rdev; + uint32_t blksize; + uint32_t padding; }; struct fuse_kstatfs { - __u64 blocks; - __u64 bfree; - __u64 bavail; - __u64 files; - __u64 ffree; - __u32 bsize; - __u32 namelen; - __u32 frsize; - __u32 padding; - __u32 spare[6]; + uint64_t blocks; + uint64_t bfree; + uint64_t bavail; + uint64_t files; + uint64_t ffree; + uint32_t bsize; + uint32_t namelen; + uint32_t frsize; + uint32_t padding; + uint32_t spare[6]; }; struct fuse_file_lock { - __u64 start; - __u64 end; - __u32 type; - __u32 pid; /* tgid */ + uint64_t start; + uint64_t end; + uint32_t type; + uint32_t pid; /* tgid */ }; /** @@ -364,143 +359,143 @@ enum fuse_notify_code { #define FUSE_COMPAT_ENTRY_OUT_SIZE 120 struct fuse_entry_out { - __u64 nodeid; /* Inode ID */ - __u64 generation; /* Inode generation: nodeid:gen must - be unique for the fs's lifetime */ - __u64 entry_valid; /* Cache timeout for the name */ - __u64 attr_valid; /* Cache timeout for the attributes */ - __u32 entry_valid_nsec; - __u32 attr_valid_nsec; + uint64_t nodeid; /* Inode ID */ + uint64_t generation; /* Inode generation: nodeid:gen must + be unique for the fs's lifetime */ + uint64_t entry_valid; /* Cache timeout for the name */ + uint64_t attr_valid; /* Cache timeout for the attributes */ + uint32_t entry_valid_nsec; + uint32_t attr_valid_nsec; struct fuse_attr attr; }; struct fuse_forget_in { - __u64 nlookup; + uint64_t nlookup; }; struct fuse_forget_one { - __u64 nodeid; - __u64 nlookup; + uint64_t nodeid; + uint64_t nlookup; }; struct fuse_batch_forget_in { - __u32 count; - __u32 dummy; + uint32_t count; + uint32_t dummy; }; struct fuse_getattr_in { - __u32 getattr_flags; - __u32 dummy; - __u64 fh; + uint32_t getattr_flags; + uint32_t dummy; + uint64_t fh; }; #define FUSE_COMPAT_ATTR_OUT_SIZE 96 struct fuse_attr_out { - __u64 attr_valid; /* Cache timeout for the attributes */ - __u32 attr_valid_nsec; - __u32 dummy; + uint64_t attr_valid; /* Cache timeout for the attributes */ + uint32_t attr_valid_nsec; + uint32_t dummy; struct fuse_attr attr; }; #define FUSE_COMPAT_MKNOD_IN_SIZE 8 struct fuse_mknod_in { - __u32 mode; - __u32 rdev; - __u32 umask; - __u32 padding; + uint32_t mode; + uint32_t rdev; + uint32_t umask; + uint32_t padding; }; struct fuse_mkdir_in { - __u32 mode; - __u32 umask; + uint32_t mode; + uint32_t umask; }; struct fuse_rename_in { - __u64 newdir; + uint64_t newdir; }; struct fuse_link_in { - __u64 oldnodeid; + uint64_t oldnodeid; }; struct fuse_setattr_in { - __u32 valid; - __u32 padding; - __u64 fh; - __u64 size; - __u64 lock_owner; - __u64 atime; - __u64 mtime; - __u64 unused2; - __u32 atimensec; - __u32 mtimensec; - __u32 unused3; - __u32 mode; - __u32 unused4; - __u32 uid; - __u32 gid; - __u32 unused5; + uint32_t valid; + uint32_t padding; + uint64_t fh; + uint64_t size; + uint64_t lock_owner; + uint64_t atime; + uint64_t mtime; + uint64_t unused2; + uint32_t atimensec; + uint32_t mtimensec; + uint32_t unused3; + uint32_t mode; + uint32_t unused4; + uint32_t uid; + uint32_t gid; + uint32_t unused5; }; struct fuse_open_in { - __u32 flags; - __u32 unused; + uint32_t flags; + uint32_t unused; }; struct fuse_create_in { - __u32 flags; - __u32 mode; - __u32 umask; - __u32 padding; + uint32_t flags; + uint32_t mode; + uint32_t umask; + uint32_t padding; }; struct fuse_open_out { - __u64 fh; - __u32 open_flags; - __u32 padding; + uint64_t fh; + uint32_t open_flags; + uint32_t padding; }; struct fuse_release_in { - __u64 fh; - __u32 flags; - __u32 release_flags; - __u64 lock_owner; + uint64_t fh; + uint32_t flags; + uint32_t release_flags; + uint64_t lock_owner; }; struct fuse_flush_in { - __u64 fh; - __u32 unused; - __u32 padding; - __u64 lock_owner; + uint64_t fh; + uint32_t unused; + uint32_t padding; + uint64_t lock_owner; }; struct fuse_read_in { - __u64 fh; - __u64 offset; - __u32 size; - __u32 read_flags; - __u64 lock_owner; - __u32 flags; - __u32 padding; + uint64_t fh; + uint64_t offset; + uint32_t size; + uint32_t read_flags; + uint64_t lock_owner; + uint32_t flags; + uint32_t padding; }; #define FUSE_COMPAT_WRITE_IN_SIZE 24 struct fuse_write_in { - __u64 fh; - __u64 offset; - __u32 size; - __u32 write_flags; - __u64 lock_owner; - __u32 flags; - __u32 padding; + uint64_t fh; + uint64_t offset; + uint32_t size; + uint32_t write_flags; + uint64_t lock_owner; + uint32_t flags; + uint32_t padding; }; struct fuse_write_out { - __u32 size; - __u32 padding; + uint32_t size; + uint32_t padding; }; #define FUSE_COMPAT_STATFS_SIZE 48 @@ -510,32 +505,32 @@ struct fuse_statfs_out { }; struct fuse_fsync_in { - __u64 fh; - __u32 fsync_flags; - __u32 padding; + uint64_t fh; + uint32_t fsync_flags; + uint32_t padding; }; struct fuse_setxattr_in { - __u32 size; - __u32 flags; + uint32_t size; + uint32_t flags; }; struct fuse_getxattr_in { - __u32 size; - __u32 padding; + uint32_t size; + uint32_t padding; }; struct fuse_getxattr_out { - __u32 size; - __u32 padding; + uint32_t size; + uint32_t padding; }; struct fuse_lk_in { - __u64 fh; - __u64 owner; + uint64_t fh; + uint64_t owner; struct fuse_file_lock lk; - __u32 lk_flags; - __u32 padding; + uint32_t lk_flags; + uint32_t padding; }; struct fuse_lk_out { @@ -543,134 +538,135 @@ struct fuse_lk_out { }; struct fuse_access_in { - __u32 mask; - __u32 padding; + uint32_t mask; + uint32_t padding; }; struct fuse_init_in { - __u32 major; - __u32 minor; - __u32 max_readahead; - __u32 flags; + uint32_t major; + uint32_t minor; + uint32_t max_readahead; + uint32_t flags; }; struct fuse_init_out { - __u32 major; - __u32 minor; - __u32 max_readahead; - __u32 flags; - __u16 max_background; - __u16 congestion_threshold; - __u32 max_write; + uint32_t major; + uint32_t minor; + uint32_t max_readahead; + uint32_t flags; + uint16_t max_background; + uint16_t congestion_threshold; + uint32_t max_write; }; #define CUSE_INIT_INFO_MAX 4096 struct cuse_init_in { - __u32 major; - __u32 minor; - __u32 unused; - __u32 flags; + uint32_t major; + uint32_t minor; + uint32_t unused; + uint32_t flags; }; struct cuse_init_out { - __u32 major; - __u32 minor; - __u32 unused; - __u32 flags; - __u32 max_read; - __u32 max_write; - __u32 dev_major; /* chardev major */ - __u32 dev_minor; /* chardev minor */ - __u32 spare[10]; + uint32_t major; + uint32_t minor; + uint32_t unused; + uint32_t flags; + uint32_t max_read; + uint32_t max_write; + uint32_t dev_major; /* chardev major */ + uint32_t dev_minor; /* chardev minor */ + uint32_t spare[10]; }; struct fuse_interrupt_in { - __u64 unique; + uint64_t unique; }; struct fuse_bmap_in { - __u64 block; - __u32 blocksize; - __u32 padding; + uint64_t block; + uint32_t blocksize; + uint32_t padding; }; struct fuse_bmap_out { - __u64 block; + uint64_t block; }; struct fuse_ioctl_in { - __u64 fh; - __u32 flags; - __u32 cmd; - __u64 arg; - __u32 in_size; - __u32 out_size; + uint64_t fh; + uint32_t flags; + uint32_t cmd; + uint64_t arg; + uint32_t in_size; + uint32_t out_size; }; struct fuse_ioctl_iovec { - __u64 base; - __u64 len; + uint64_t base; + uint64_t len; }; struct fuse_ioctl_out { - __s32 result; - __u32 flags; - __u32 in_iovs; - __u32 out_iovs; + int32_t result; + uint32_t flags; + uint32_t in_iovs; + uint32_t out_iovs; }; struct fuse_poll_in { - __u64 fh; - __u64 kh; - __u32 flags; - __u32 events; + uint64_t fh; + uint64_t kh; + uint32_t flags; + uint32_t events; }; struct fuse_poll_out { - __u32 revents; - __u32 padding; + uint32_t revents; + uint32_t padding; }; struct fuse_notify_poll_wakeup_out { - __u64 kh; + uint64_t kh; }; struct fuse_fallocate_in { - __u64 fh; - __u64 offset; - __u64 length; - __u32 mode; - __u32 padding; + uint64_t fh; + uint64_t offset; + uint64_t length; + uint32_t mode; + uint32_t padding; }; struct fuse_in_header { - __u32 len; - __u32 opcode; - __u64 unique; - __u64 nodeid; - __u32 uid; - __u32 gid; - __u32 pid; - __u32 padding; + uint32_t len; + uint32_t opcode; + uint64_t unique; + uint64_t nodeid; + uint32_t uid; + uint32_t gid; + uint32_t pid; + uint32_t padding; }; struct fuse_out_header { - __u32 len; - __s32 error; - __u64 unique; + uint32_t len; + int32_t error; + uint64_t unique; }; struct fuse_dirent { - __u64 ino; - __u64 off; - __u32 namelen; - __u32 type; + uint64_t ino; + uint64_t off; + uint32_t namelen; + uint32_t type; char name[]; }; #define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) -#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1)) +#define FUSE_DIRENT_ALIGN(x) \ + (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) @@ -685,47 +681,47 @@ struct fuse_direntplus { FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) struct fuse_notify_inval_inode_out { - __u64 ino; - __s64 off; - __s64 len; + uint64_t ino; + int64_t off; + int64_t len; }; struct fuse_notify_inval_entry_out { - __u64 parent; - __u32 namelen; - __u32 padding; + uint64_t parent; + uint32_t namelen; + uint32_t padding; }; struct fuse_notify_delete_out { - __u64 parent; - __u64 child; - __u32 namelen; - __u32 padding; + uint64_t parent; + uint64_t child; + uint32_t namelen; + uint32_t padding; }; struct fuse_notify_store_out { - __u64 nodeid; - __u64 offset; - __u32 size; - __u32 padding; + uint64_t nodeid; + uint64_t offset; + uint32_t size; + uint32_t padding; }; struct fuse_notify_retrieve_out { - __u64 notify_unique; - __u64 nodeid; - __u64 offset; - __u32 size; - __u32 padding; + uint64_t notify_unique; + uint64_t nodeid; + uint64_t offset; + uint32_t size; + uint32_t padding; }; /* Matches the size of fuse_write_in */ struct fuse_notify_retrieve_in { - __u64 dummy1; - __u64 offset; - __u32 size; - __u32 dummy2; - __u64 dummy3; - __u64 dummy4; + uint64_t dummy1; + uint64_t offset; + uint32_t size; + uint32_t dummy2; + uint64_t dummy3; + uint64_t dummy4; }; #endif /* _LINUX_FUSE_H */ From 8c58bf3eec3b8fc8162fe557e9361891c20758f2 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 17 Apr 2013 01:00:53 +0200 Subject: [PATCH 236/303] x86,efi: Implement efi_no_storage_paranoia parameter Using this parameter one can disable the storage_size/2 check if he is really sure that the UEFI does sane gc and fulfills the spec. This parameter is useful if a devices uses more than 50% of the storage by default. The Intel DQSW67 desktop board is such a sucker for exmaple. Signed-off-by: Richard Weinberger Signed-off-by: Matt Fleming --- Documentation/kernel-parameters.txt | 6 ++++++ arch/x86/platform/efi/efi.c | 14 +++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc37..d1cc3a9fa14f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -788,6 +788,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. edd= [EDD] Format: {"off" | "on" | "skip[mbr]"} + efi_no_storage_paranoia [EFI; X86] + Using this parameter you can use more than 50% of + your efi variable storage. Use this parameter only if + you are really sure that your UEFI does sane gc and + fulfills the spec otherwise your board may brick. + eisa_irq_edge= [PARISC,HW] See header of drivers/parisc/eisa.c. diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4f364c7c6111..e4a86a677ce1 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -113,6 +113,15 @@ static int __init setup_add_efi_memmap(char *arg) } early_param("add_efi_memmap", setup_add_efi_memmap); +static bool efi_no_storage_paranoia; + +static int __init setup_storage_paranoia(char *arg) +{ + efi_no_storage_paranoia = true; + return 0; +} +early_param("efi_no_storage_paranoia", setup_storage_paranoia); + static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { @@ -1137,7 +1146,10 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) */ if (!storage_size || size > remaining_size || - (max_size && size > max_size) || + (max_size && size > max_size)) + return EFI_OUT_OF_RESOURCES; + + if (!efi_no_storage_paranoia && ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) && (remaining_size - size < storage_size / 2))) return EFI_OUT_OF_RESOURCES; From 4f2e29031e6c67802e7370292dd050fd62f337ee Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 17 Apr 2013 08:46:19 -0700 Subject: [PATCH 237/303] s390: move dummy io_remap_pfn_range() to asm/pgtable.h Commit b4cbb197c7e7 ("vm: add vm_iomap_memory() helper function") added a helper function wrapper around io_remap_pfn_range(), and every other architecture defined it in . The s390 choice of may make sense, but is not very convenient for this case, and gratuitous differences like that cause unexpected errors like this: mm/memory.c: In function 'vm_iomap_memory': mm/memory.c:2439:2: error: implicit declaration of function 'io_remap_pfn_range' [-Werror=implicit-function-declaration] Glory be the kbuild test robot who noticed this, bisected it, and reported it to the guilty parties (ie me). Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Linus Torvalds --- arch/s390/include/asm/io.h | 4 ---- arch/s390/include/asm/pgtable.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 27cb32185ce1..379d96e2105e 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -50,10 +50,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr); #define ioremap_nocache(addr, size) ioremap(addr, size) #define ioremap_wc ioremap_nocache -/* TODO: s390 cannot support io_remap_pfn_range... */ -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - static inline void __iomem *ioremap(unsigned long offset, unsigned long size) { return (void __iomem *) offset; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4a5443118cfb..3cb47cf02530 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -57,6 +57,10 @@ extern unsigned long zero_page_mask; (((unsigned long)(vaddr)) &zero_page_mask)))) #define __HAVE_COLOR_ZERO_PAGE +/* TODO: s390 cannot support io_remap_pfn_range... */ +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + #endif /* !__ASSEMBLY__ */ /* From f5d6a1441a5045824f36ff7c6b6bbae0373472a6 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 3 Apr 2013 22:28:41 +0100 Subject: [PATCH 238/303] ARM: 7692/1: iop3xx: move IOP3XX_PERIPHERAL_VIRT_BASE Currently IOP3XX_PERIPHERAL_VIRT_BASE conflicts with PCI_IO_VIRT_BASE: address size PCI_IO_VIRT_BASE 0xfee00000 0x200000 IOP3XX_PERIPHERAL_VIRT_BASE 0xfeffe000 0x2000 Fix by moving IOP3XX_PERIPHERAL_VIRT_BASE below PCI_IO_VIRT_BASE. The patch fixes the following kernel panic with 3.9-rc1 on iop3xx boards: [ 0.000000] Booting Linux on physical CPU 0x0 [ 0.000000] Initializing cgroup subsys cpu [ 0.000000] Linux version 3.9.0-rc1-iop32x (aaro@blackmetal) (gcc version 4.7.2 (GCC) ) #20 PREEMPT Tue Mar 5 16:44:36 EET 2013 [ 0.000000] bootconsole [earlycon0] enabled [ 0.000000] ------------[ cut here ]------------ [ 0.000000] kernel BUG at mm/vmalloc.c:1145! [ 0.000000] Internal error: Oops - BUG: 0 [#1] PREEMPT ARM [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 Not tainted (3.9.0-rc1-iop32x #20) [ 0.000000] PC is at vm_area_add_early+0x4c/0x88 [ 0.000000] LR is at add_static_vm_early+0x14/0x68 [ 0.000000] pc : [] lr : [] psr: 800000d3 [ 0.000000] sp : c03ffee4 ip : dfffdf88 fp : c03ffef4 [ 0.000000] r10: 00000002 r9 : 000000cf r8 : 00000653 [ 0.000000] r7 : c040eca8 r6 : c03e2408 r5 : dfffdf60 r4 : 00200000 [ 0.000000] r3 : dfffdfd8 r2 : feffe000 r1 : ff000000 r0 : dfffdf60 [ 0.000000] Flags: Nzcv IRQs off FIQs off Mode SVC_32 ISA ARM Segment kernel [ 0.000000] Control: 0000397f Table: a0004000 DAC: 00000017 [ 0.000000] Process swapper (pid: 0, stack limit = 0xc03fe1b8) [ 0.000000] Stack: (0xc03ffee4 to 0xc0400000) [ 0.000000] fee0: 00200000 c03fff0c c03ffef8 c03e1c40 c03e7468 00200000 fee00000 [ 0.000000] ff00: c03fff2c c03fff10 c03e23e4 c03e1c38 feffe000 c0408ee4 ff000000 c0408f04 [ 0.000000] ff20: c03fff3c c03fff30 c03e2434 c03e23b4 c03fff84 c03fff40 c03e2c94 c03e2414 [ 0.000000] ff40: c03f8878 c03f6410 ffff0000 000bffff 00001000 00000008 c03fff84 c03f6410 [ 0.000000] ff60: c04227e8 c03fffd4 a0008000 c03f8878 69052e30 c02f96eb c03fffbc c03fff88 [ 0.000000] ff80: c03e044c c03e268c 00000000 0000397f c0385130 00000001 ffffffff c03f8874 [ 0.000000] ffa0: dfffffff a0004000 69052e30 a03f61a0 c03ffff4 c03fffc0 c03dd5cc c03e0184 [ 0.000000] ffc0: 00000000 00000000 00000000 00000000 00000000 c03f8878 0000397d c040601c [ 0.000000] ffe0: c03f8874 c0408674 00000000 c03ffff8 a0008040 c03dd558 00000000 00000000 [ 0.000000] Backtrace: [ 0.000000] [] (vm_area_add_early+0x0/0x88) from [] (add_static_vm_early+0x14/0x68) Tested-by: Mikael Pettersson Signed-off-by: Aaro Koskinen Signed-off-by: Russell King --- arch/arm/include/asm/hardware/iop3xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/hardware/iop3xx.h b/arch/arm/include/asm/hardware/iop3xx.h index 02fe2fbe2477..ed94b1a366ae 100644 --- a/arch/arm/include/asm/hardware/iop3xx.h +++ b/arch/arm/include/asm/hardware/iop3xx.h @@ -37,7 +37,7 @@ extern int iop3xx_get_init_atu(void); * IOP3XX processor registers */ #define IOP3XX_PERIPHERAL_PHYS_BASE 0xffffe000 -#define IOP3XX_PERIPHERAL_VIRT_BASE 0xfeffe000 +#define IOP3XX_PERIPHERAL_VIRT_BASE 0xfedfe000 #define IOP3XX_PERIPHERAL_SIZE 0x00002000 #define IOP3XX_PERIPHERAL_UPPER_PA (IOP3XX_PERIPHERAL_PHYS_BASE +\ IOP3XX_PERIPHERAL_SIZE - 1) From de40614e92bf1b0308d953387b0cb9d3a5710186 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 5 Apr 2013 03:16:51 +0100 Subject: [PATCH 239/303] ARM: 7694/1: ARM, TCM: initialize TCM in paging_init(), instead of setup_arch() tcm_init() call iotable_init() and it use early_alloc variants which do memblock allocation. Directly using memblock allocation after initializing bootmem should not permitted, because bootmem can't know where are additinally reserved. So move tcm_init() to a safe place before initalizing bootmem. (On the U300) Tested-by: Linus Walleij Signed-off-by: Joonsoo Kim Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 3 --- arch/arm/kernel/tcm.c | 1 - arch/arm/mm/mmu.c | 2 ++ arch/arm/{kernel => mm}/tcm.h | 0 4 files changed, 2 insertions(+), 4 deletions(-) rename arch/arm/{kernel => mm}/tcm.h (100%) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d343a6c3a6d1..234e339196c0 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -56,7 +56,6 @@ #include #include "atags.h" -#include "tcm.h" #if defined(CONFIG_FPE_NWFPE) || defined(CONFIG_FPE_FASTFPE) @@ -798,8 +797,6 @@ void __init setup_arch(char **cmdline_p) reserve_crashkernel(); - tcm_init(); - #ifdef CONFIG_MULTI_IRQ_HANDLER handle_arch_irq = mdesc->handle_irq; #endif diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c index 30ae6bb4a310..f50f19e5c138 100644 --- a/arch/arm/kernel/tcm.c +++ b/arch/arm/kernel/tcm.c @@ -17,7 +17,6 @@ #include #include #include -#include "tcm.h" static struct gen_pool *tcm_pool; static bool dtcm_present; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 78978945492a..a84ff763ac39 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -34,6 +34,7 @@ #include #include "mm.h" +#include "tcm.h" /* * empty_zero_page is a special page that is used for @@ -1277,6 +1278,7 @@ void __init paging_init(struct machine_desc *mdesc) dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); + tcm_init(); top_pmd = pmd_off_k(0xffff0000); diff --git a/arch/arm/kernel/tcm.h b/arch/arm/mm/tcm.h similarity index 100% rename from arch/arm/kernel/tcm.h rename to arch/arm/mm/tcm.h From cd272d1ea71583170e95dde02c76166c7f9017e6 Mon Sep 17 00:00:00 2001 From: Illia Ragozin Date: Wed, 10 Apr 2013 19:43:34 +0100 Subject: [PATCH 240/303] ARM: 7696/1: Fix kexec by setting outer_cache.inv_all for Feroceon On Feroceon the L2 cache becomes non-coherent with the CPU when the L1 caches are disabled. Thus the L2 needs to be invalidated after both L1 caches are disabled. On kexec before the starting the code for relocation the kernel, the L1 caches are disabled in cpu_froc_fin (cpu_v7_proc_fin for Feroceon), but after L2 cache is never invalidated, because inv_all is not set in cache-feroceon-l2.c. So kernel relocation and decompression may has (and usually has) errors. Setting the function enables L2 invalidation and fixes the issue. Cc: Signed-off-by: Illia Ragozin Acked-by: Jason Cooper Signed-off-by: Russell King --- arch/arm/mm/cache-feroceon-l2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index dd3d59122cc3..48bc3c0a87ce 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -343,6 +343,7 @@ void __init feroceon_l2_init(int __l2_wt_override) outer_cache.inv_range = feroceon_l2_inv_range; outer_cache.clean_range = feroceon_l2_clean_range; outer_cache.flush_range = feroceon_l2_flush_range; + outer_cache.inv_all = l2_inv_all; enable_l2(); From 50acff3c1f9ee9753684e676929b82926f15966c Mon Sep 17 00:00:00 2001 From: Bastian Hecht Date: Fri, 12 Apr 2013 19:03:50 +0100 Subject: [PATCH 241/303] ARM: 7697/1: hw_breakpoint: do not use __cpuinitdata for dbg_cpu_pm_nb We must not declare dbg_cpu_pm_nb as __cpuinitdata as we need it after system initialization for Suspend and CPUIdle. This was done in commit 9a6eb310eaa5 ("ARM: hw_breakpoint: Debug powerdown support for self-hosted debug"). Cc: stable@vger.kernel.org Cc: Dietmar Eggemann Signed-off-by: Bastian Hecht Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/hw_breakpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 5dc1aa6f0f7d..1fd749ee4a1b 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -1043,7 +1043,7 @@ static int dbg_cpu_pm_notify(struct notifier_block *self, unsigned long action, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata dbg_cpu_pm_nb = { +static struct notifier_block dbg_cpu_pm_nb = { .notifier_call = dbg_cpu_pm_notify, }; From cb2d8b342aa084d1f3ac29966245dec9163677fb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 12 Apr 2013 19:04:19 +0100 Subject: [PATCH 242/303] ARM: 7698/1: perf: fix group validation when using enable_on_exec Events may be created with attr->disabled == 1 and attr->enable_on_exec == 1, which confuses the group validation code because events with the PERF_EVENT_STATE_OFF are not considered candidates for scheduling, which may lead to failure at group scheduling time. This patch fixes the validation check for ARM, so that events in the OFF state are still considered when enable_on_exec is true. Cc: stable@vger.kernel.org Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Reported-by: Sudeep KarkadaNagesha Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 146157dfe27c..8c3094d0f7b7 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -253,7 +253,10 @@ validate_event(struct pmu_hw_events *hw_events, struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct pmu *leader_pmu = event->group_leader->pmu; - if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) + if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; return armpmu->get_event_idx(hw_events, event) >= 0; From 46faeed4a61e220b99591e9773057160eb437cc8 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 10 Apr 2013 17:47:46 -0500 Subject: [PATCH 243/303] rbd: do a safe list traversal in rbd_img_request_submit() It's possible that the reference to the object request dropped inside the loop in rbd_img_request_submit() will be the last one, in which case the content of the object pointer can't be trusted. Use a safe form of the object request list traversal to avoid problems. This resolves: http://tracker.ceph.com/issues/4705 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f556f8a8b3f9..b7b7a88d9f68 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1742,9 +1742,10 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request) struct rbd_device *rbd_dev = img_request->rbd_dev; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; + struct rbd_obj_request *next_obj_request; dout("%s: img %p\n", __func__, img_request); - for_each_obj_request(img_request, obj_request) { + for_each_obj_request_safe(img_request, obj_request, next_obj_request) { int ret; obj_request->callback = rbd_img_obj_callback; From c729de8fcea37a1c444e81857eace12494c804a9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 15 Apr 2013 22:23:45 -0700 Subject: [PATCH 244/303] x86, kdump: Set crashkernel_low automatically Chao said that kdump does does work well on his system on 3.8 without extra parameter, even iommu does not work with kdump. And now have to append crashkernel_low=Y in first kernel to make kdump work. We have now modified crashkernel=X to allocate memory beyong 4G (if available) and do not allocate low range for crashkernel if the user does not specify that with crashkernel_low=Y. This causes regression if iommu is not enabled. Without iommu, swiotlb needs to be setup in first 4G and there is no low memory available to second kernel. Set crashkernel_low automatically if the user does not specify that. For system that does support IOMMU with kdump properly, user could specify crashkernel_low=0 to save that 72M low ram. -v3: add swiotlb_size() according to Konrad. -v4: add comments what 8M is for according to hpa. also update more crashkernel_low= in kernel-parameters.txt -v5: update changelog according to Vivek. -v6: Change description about swiotlb referring according to HATAYAMA. Reported-by: WANG Chao Tested-by: WANG Chao Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1366089828-19692-2-git-send-email-yinghai@kernel.org Acked-by: Vivek Goyal Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 14 +++++++++++--- arch/x86/kernel/setup.c | 21 ++++++++++++++++++--- include/linux/swiotlb.h | 1 + lib/swiotlb.c | 19 +++++++++++++++---- 4 files changed, 45 insertions(+), 10 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc37..cff672da2486 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -596,9 +596,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. is selected automatically. Check Documentation/kdump/kdump.txt for further details. - crashkernel_low=size[KMG] - [KNL, x86] parts under 4G. - crashkernel=range1:size1[,range2:size2,...][@offset] [KNL] Same as above, but depends on the memory in the running system. The syntax of range is @@ -606,6 +603,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a memory unit (amount[KMG]). See also Documentation/kdump/kdump.txt for an example. + crashkernel_low=size[KMG] + [KNL, x86_64] range under 4G. When crashkernel= is + passed, kernel allocate physical memory region + above 4G, that cause second kernel crash on system + that require some amount of low memory, e.g. swiotlb + requires at least 64M+32K low memory. Kernel would + try to allocate 72M below 4G automatically. + This one let user to specify own low range under 4G + for second kernel instead. + 0: to disable low allocation. + cs89x0_dma= [HW,NET] Format: diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 90d8cc930f5e..12349202cae7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -521,19 +521,34 @@ static void __init reserve_crashkernel_low(void) unsigned long long low_base = 0, low_size = 0; unsigned long total_low_mem; unsigned long long base; + bool auto_set = false; int ret; total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); - if (ret != 0 || low_size <= 0) - return; + if (ret != 0) { + /* + * two parts from lib/swiotlb.c: + * swiotlb size: user specified with swiotlb= or default. + * swiotlb overflow buffer: now is hardcoded to 32k. + * We round it to 8M for other buffers that + * may need to stay low too. + */ + low_size = swiotlb_size_or_default() + (8UL<<20); + auto_set = true; + } else { + /* passed with crashkernel_low=0 ? */ + if (!low_size) + return; + } low_base = memblock_find_in_range(low_size, (1ULL<<32), low_size, alignment); if (!low_base) { - pr_info("crashkernel low reservation failed - No suitable area found.\n"); + if (!auto_set) + pr_info("crashkernel low reservation failed - No suitable area found.\n"); return; } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 2de42f9401d2..a5ffd32642fd 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -25,6 +25,7 @@ extern int swiotlb_force; extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); extern unsigned long swiotlb_nr_tbl(void); +unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); /* diff --git a/lib/swiotlb.c b/lib/swiotlb.c index bfe02b8fc55b..d23762e6652c 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -105,9 +105,9 @@ setup_io_tlb_npages(char *str) if (!strcmp(str, "force")) swiotlb_force = 1; - return 1; + return 0; } -__setup("swiotlb=", setup_io_tlb_npages); +early_param("swiotlb", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ unsigned long swiotlb_nr_tbl(void) @@ -115,6 +115,18 @@ unsigned long swiotlb_nr_tbl(void) return io_tlb_nslabs; } EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); + +/* default to 64MB */ +#define IO_TLB_DEFAULT_SIZE (64UL<<20) +unsigned long swiotlb_size_or_default(void) +{ + unsigned long size; + + size = io_tlb_nslabs << IO_TLB_SHIFT; + + return size ? size : (IO_TLB_DEFAULT_SIZE); +} + /* Note that this doesn't work with highmem page */ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) @@ -188,8 +200,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) void __init swiotlb_init(int verbose) { - /* default to 64MB */ - size_t default_size = 64UL<<20; + size_t default_size = IO_TLB_DEFAULT_SIZE; unsigned char *vstart; unsigned long bytes; From 55a20ee7804ab64ac90bcdd4e2868a42829e2784 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 15 Apr 2013 22:23:47 -0700 Subject: [PATCH 245/303] x86, kdump: Retore crashkernel= to allocate under 896M Vivek found old kexec-tools does not work new kernel anymore. So change back crashkernel= back to old behavoir, and add crashkernel_high= to let user decide if buffer could be above 4G, and also new kexec-tools will be needed. -v2: let crashkernel=X override crashkernel_high= update description about _high will be ignored by crashkernel=X -v3: update description about kernel-parameters.txt according to Vivek. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1366089828-19692-4-git-send-email-yinghai@kernel.org Acked-by: Vivek Goyal Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 13 +++++++++++-- arch/x86/kernel/setup.c | 24 +++++++++++++++++++----- include/linux/kexec.h | 2 ++ kernel/kexec.c | 9 +++++++++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cff672da2486..709eb3edc6b2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -603,9 +603,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a memory unit (amount[KMG]). See also Documentation/kdump/kdump.txt for an example. + crashkernel_high=size[KMG] + [KNL, x86_64] range could be above 4G. Allow kernel + to allocate physical memory region from top, so could + be above 4G if system have more than 4G ram installed. + Otherwise memory region will be allocated below 4G, if + available. + It will be ignored if crashkernel=X is specified. crashkernel_low=size[KMG] - [KNL, x86_64] range under 4G. When crashkernel= is - passed, kernel allocate physical memory region + [KNL, x86_64] range under 4G. When crashkernel_high= is + passed, kernel could allocate physical memory region above 4G, that cause second kernel crash on system that require some amount of low memory, e.g. swiotlb requires at least 64M+32K low memory. Kernel would @@ -613,6 +620,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. + It will be ignored when crashkernel_high=X is not used + or memory reserved is below 4G. cs89x0_dma= [HW,NET] Format: diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 12349202cae7..a85a144f2052 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -507,11 +507,14 @@ static void __init memblock_x86_reserve_range_setup_data(void) /* * Keep the crash kernel below this limit. On 32 bits earlier kernels * would limit the kernel to the low 512 MiB due to mapping restrictions. + * On 64bit, old kexec-tools need to under 896MiB. */ #ifdef CONFIG_X86_32 -# define CRASH_KERNEL_ADDR_MAX (512 << 20) +# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20) +# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20) #else -# define CRASH_KERNEL_ADDR_MAX MAXMEM +# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20) +# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM #endif static void __init reserve_crashkernel_low(void) @@ -525,6 +528,7 @@ static void __init reserve_crashkernel_low(void) int ret; total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); + /* crashkernel_low=YM */ ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); if (ret != 0) { @@ -569,14 +573,22 @@ static void __init reserve_crashkernel(void) const unsigned long long alignment = 16<<20; /* 16M */ unsigned long long total_mem; unsigned long long crash_size, crash_base; + bool high = false; int ret; total_mem = memblock_phys_mem_size(); + /* crashkernel=XM */ ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); - if (ret != 0 || crash_size <= 0) - return; + if (ret != 0 || crash_size <= 0) { + /* crashkernel_high=XM */ + ret = parse_crashkernel_high(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret != 0 || crash_size <= 0) + return; + high = true; + } /* 0 means: find the address automatically */ if (crash_base <= 0) { @@ -584,7 +596,9 @@ static void __init reserve_crashkernel(void) * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX */ crash_base = memblock_find_in_range(alignment, - CRASH_KERNEL_ADDR_MAX, crash_size, alignment); + high ? CRASH_KERNEL_ADDR_HIGH_MAX : + CRASH_KERNEL_ADDR_LOW_MAX, + crash_size, alignment); if (!crash_base) { pr_info("crashkernel reservation failed - No suitable area found.\n"); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d2e6927bbaae..d78d28a733b1 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -200,6 +200,8 @@ extern size_t vmcoreinfo_max_size; int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); +int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); int crash_shrink_memory(unsigned long new_size); diff --git a/kernel/kexec.c b/kernel/kexec.c index bddd3d7a74b6..1b2f73f5f9b9 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1422,6 +1422,15 @@ int __init parse_crashkernel(char *cmdline, "crashkernel="); } +int __init parse_crashkernel_high(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel_high="); +} + int __init parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, From adbc742bf78695bb98c79d18c558b61571748b99 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 15 Apr 2013 22:23:48 -0700 Subject: [PATCH 246/303] x86, kdump: Change crashkernel_high/low= to crashkernel=,high/low Per hpa, use crashkernel=X,high crashkernel=Y,low instead of crashkernel_hign=X crashkernel_low=Y. As that could be extensible. -v2: according to Vivek, change delimiter to ; -v3: let hign and low only handle simple form and it conforms to description in kernel-parameters.txt still keep crashkernel=X override any crashkernel=X,high crashkernel=Y,low -v4: update get_last_crashkernel returning and add more strict checking in parse_crashkernel_simple() found by HATAYAMA. -v5: Change delimiter back to , according to HPA. also separate parse_suffix from parse_simper according to vivek. so we can avoid @pos in that path. -v6: Tight the checking about crashkernel=X,highblahblah,high found by HTYAYAMA. Cc: HATAYAMA Daisuke Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1366089828-19692-5-git-send-email-yinghai@kernel.org Acked-by: Vivek Goyal Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 10 +-- arch/x86/kernel/setup.c | 6 +- kernel/kexec.c | 109 ++++++++++++++++++++++++---- 3 files changed, 104 insertions(+), 21 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 709eb3edc6b2..a1ac1f1d6d34 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -603,16 +603,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a memory unit (amount[KMG]). See also Documentation/kdump/kdump.txt for an example. - crashkernel_high=size[KMG] + crashkernel=size[KMG],high [KNL, x86_64] range could be above 4G. Allow kernel to allocate physical memory region from top, so could be above 4G if system have more than 4G ram installed. Otherwise memory region will be allocated below 4G, if available. It will be ignored if crashkernel=X is specified. - crashkernel_low=size[KMG] - [KNL, x86_64] range under 4G. When crashkernel_high= is - passed, kernel could allocate physical memory region + crashkernel=size[KMG],low + [KNL, x86_64] range under 4G. When crashkernel=X,high + is passed, kernel could allocate physical memory region above 4G, that cause second kernel crash on system that require some amount of low memory, e.g. swiotlb requires at least 64M+32K low memory. Kernel would @@ -620,7 +620,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. - It will be ignored when crashkernel_high=X is not used + It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. cs89x0_dma= [HW,NET] diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a85a144f2052..fae9134a2de9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -528,7 +528,7 @@ static void __init reserve_crashkernel_low(void) int ret; total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); - /* crashkernel_low=YM */ + /* crashkernel=Y,low */ ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); if (ret != 0) { @@ -542,7 +542,7 @@ static void __init reserve_crashkernel_low(void) low_size = swiotlb_size_or_default() + (8UL<<20); auto_set = true; } else { - /* passed with crashkernel_low=0 ? */ + /* passed with crashkernel=0,low ? */ if (!low_size) return; } @@ -582,7 +582,7 @@ static void __init reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); if (ret != 0 || crash_size <= 0) { - /* crashkernel_high=XM */ + /* crashkernel=X,high */ ret = parse_crashkernel_high(boot_command_line, total_mem, &crash_size, &crash_base); if (ret != 0 || crash_size <= 0) diff --git a/kernel/kexec.c b/kernel/kexec.c index 1b2f73f5f9b9..401fdb041f35 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1368,35 +1368,114 @@ static int __init parse_crashkernel_simple(char *cmdline, return 0; } +#define SUFFIX_HIGH 0 +#define SUFFIX_LOW 1 +#define SUFFIX_NULL 2 +static __initdata char *suffix_tbl[] = { + [SUFFIX_HIGH] = ",high", + [SUFFIX_LOW] = ",low", + [SUFFIX_NULL] = NULL, +}; + /* - * That function is the entry point for command line parsing and should be - * called from the arch-specific code. + * That function parses "suffix" crashkernel command lines like + * + * crashkernel=size,[high|low] + * + * It returns 0 on success and -EINVAL on failure. */ +static int __init parse_crashkernel_suffix(char *cmdline, + unsigned long long *crash_size, + unsigned long long *crash_base, + const char *suffix) +{ + char *cur = cmdline; + + *crash_size = memparse(cmdline, &cur); + if (cmdline == cur) { + pr_warn("crashkernel: memory value expected\n"); + return -EINVAL; + } + + /* check with suffix */ + if (strncmp(cur, suffix, strlen(suffix))) { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + cur += strlen(suffix); + if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + + return 0; +} + +static __init char *get_last_crashkernel(char *cmdline, + const char *name, + const char *suffix) +{ + char *p = cmdline, *ck_cmdline = NULL; + + /* find crashkernel and use the last one if there are more */ + p = strstr(p, name); + while (p) { + char *end_p = strchr(p, ' '); + char *q; + + if (!end_p) + end_p = p + strlen(p); + + if (!suffix) { + int i; + + /* skip the one with any known suffix */ + for (i = 0; suffix_tbl[i]; i++) { + q = end_p - strlen(suffix_tbl[i]); + if (!strncmp(q, suffix_tbl[i], + strlen(suffix_tbl[i]))) + goto next; + } + ck_cmdline = p; + } else { + q = end_p - strlen(suffix); + if (!strncmp(q, suffix, strlen(suffix))) + ck_cmdline = p; + } +next: + p = strstr(p+1, name); + } + + if (!ck_cmdline) + return NULL; + + return ck_cmdline; +} + static int __init __parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - const char *name) + const char *name, + const char *suffix) { - char *p = cmdline, *ck_cmdline = NULL; char *first_colon, *first_space; + char *ck_cmdline; BUG_ON(!crash_size || !crash_base); *crash_size = 0; *crash_base = 0; - /* find crashkernel and use the last one if there are more */ - p = strstr(p, name); - while (p) { - ck_cmdline = p; - p = strstr(p+1, name); - } + ck_cmdline = get_last_crashkernel(cmdline, name, suffix); if (!ck_cmdline) return -EINVAL; ck_cmdline += strlen(name); + if (suffix) + return parse_crashkernel_suffix(ck_cmdline, crash_size, + crash_base, suffix); /* * if the commandline contains a ':', then that's the extended * syntax -- if not, it must be the classic syntax @@ -1413,13 +1492,17 @@ static int __init __parse_crashkernel(char *cmdline, return 0; } +/* + * That function is the entry point for command line parsing and should be + * called from the arch-specific code. + */ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel="); + "crashkernel=", NULL); } int __init parse_crashkernel_high(char *cmdline, @@ -1428,7 +1511,7 @@ int __init parse_crashkernel_high(char *cmdline, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel_high="); + "crashkernel=", suffix_tbl[SUFFIX_HIGH]); } int __init parse_crashkernel_low(char *cmdline, @@ -1437,7 +1520,7 @@ int __init parse_crashkernel_low(char *cmdline, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel_low="); + "crashkernel=", suffix_tbl[SUFFIX_LOW]); } static void update_vmcoreinfo_note(void) From 157752d84f5df47e01577970f9c5f61a0b9f4546 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 15 Apr 2013 22:23:46 -0700 Subject: [PATCH 247/303] kexec: use Crash kernel for Crash kernel low We can extend kexec-tools to support multiple "Crash kernel" in /proc/iomem instead. So we can use "Crash kernel" instead of "Crash kernel low" in /proc/iomem. Suggested-by: Vivek Goyal Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1366089828-19692-3-git-send-email-yinghai@kernel.org Acked-by: Vivek Goyal Signed-off-by: H. Peter Anvin --- kernel/kexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kexec.c b/kernel/kexec.c index 401fdb041f35..ffd4e111fd67 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -55,7 +55,7 @@ struct resource crashk_res = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; struct resource crashk_low_res = { - .name = "Crash kernel low", + .name = "Crash kernel", .start = 0, .end = 0, .flags = IORESOURCE_BUSY | IORESOURCE_MEM From 15bbc1b28ff65767922f78c266821cc138b90a47 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Apr 2013 12:09:09 -0700 Subject: [PATCH 248/303] ARM: KVM: fix unbalanced get_cpu() in access_dcsw In the very unlikely event where a guest would be foolish enough to *read* from a write-only cache maintainance register, we end up with preemption disabled, due to a misplaced get_cpu(). Just move the "is_write" test outside of the critical section. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Linus Torvalds --- arch/arm/kvm/coproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 4ea9a982269c..7bed7556077a 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -79,11 +79,11 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, u32 val; int cpu; - cpu = get_cpu(); - if (!p->is_write) return read_from_write_only(vcpu, p); + cpu = get_cpu(); + cpumask_setall(&vcpu->arch.require_dcache_flush); cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); From fe8a93b95145c66adf196eea4a919dfe0b7c57db Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 3 Apr 2013 19:10:26 +0200 Subject: [PATCH 249/303] batman-adv: make is_my_mac() check for the current mesh only On a multi-mesh node (a node running more than one batman-adv virtual interface) batadv_is_my_mac() has to check MAC addresses of hard interfaces belonging to the current mesh only. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/main.c | 5 +++- net/batman-adv/main.h | 2 +- net/batman-adv/routing.c | 38 ++++++++++++++++-------------- net/batman-adv/translation-table.c | 2 +- net/batman-adv/vis.c | 4 ++-- 5 files changed, 28 insertions(+), 23 deletions(-) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 0488d70c8c35..fa563e497c48 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -169,7 +169,7 @@ void batadv_mesh_free(struct net_device *soft_iface) atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); } -int batadv_is_my_mac(const uint8_t *addr) +int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr) { const struct batadv_hard_iface *hard_iface; @@ -178,6 +178,9 @@ int batadv_is_my_mac(const uint8_t *addr) if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) { rcu_read_unlock(); return 1; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index ced08b936a96..d40910dfc8ea 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -162,7 +162,7 @@ extern struct workqueue_struct *batadv_event_workqueue; int batadv_mesh_init(struct net_device *soft_iface); void batadv_mesh_free(struct net_device *soft_iface); -int batadv_is_my_mac(const uint8_t *addr); +int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_hard_iface * batadv_seq_print_text_primary_if_get(struct seq_file *seq); int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 5ee21cebbbb0..319f2906c71a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -402,7 +402,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, goto out; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) goto out; icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; @@ -416,7 +416,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, } /* packet for me */ - if (batadv_is_my_mac(icmp_packet->dst)) + if (batadv_is_my_mac(bat_priv, icmp_packet->dst)) return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size); /* TTL exceeded */ @@ -548,7 +548,8 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, return router; } -static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) +static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) { struct ethhdr *ethhdr; @@ -567,7 +568,7 @@ static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) return -1; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return -1; return 0; @@ -582,7 +583,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) char tt_flag; size_t packet_size; - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; /* I could need to modify it */ @@ -614,7 +615,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) case BATADV_TT_RESPONSE: batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX); - if (batadv_is_my_mac(tt_query->dst)) { + if (batadv_is_my_mac(bat_priv, tt_query->dst)) { /* packet needs to be linearized to access the TT * changes */ @@ -657,14 +658,15 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) struct batadv_roam_adv_packet *roam_adv_packet; struct batadv_orig_node *orig_node; - if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, + sizeof(*roam_adv_packet)) < 0) goto out; batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); roam_adv_packet = (struct batadv_roam_adv_packet *)skb->data; - if (!batadv_is_my_mac(roam_adv_packet->dst)) + if (!batadv_is_my_mac(bat_priv, roam_adv_packet->dst)) return batadv_route_unicast_packet(skb, recv_if); /* check if it is a backbone gateway. we don't accept @@ -967,7 +969,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * last time) the packet had an updated information or not */ curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); - if (!batadv_is_my_mac(unicast_packet->dest)) { + if (!batadv_is_my_mac(bat_priv, unicast_packet->dest)) { orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); /* if it is not possible to find the orig_node representing the @@ -1044,14 +1046,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) return NET_RX_DROP; /* packet for me */ - if (batadv_is_my_mac(unicast_packet->dest)) { + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { if (is4addr) { batadv_dat_inc_counter(bat_priv, unicast_4addr_packet->subtype); @@ -1088,7 +1090,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, struct sk_buff *new_skb = NULL; int ret; - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) @@ -1097,7 +1099,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_frag_packet *)skb->data; /* packet for me */ - if (batadv_is_my_mac(unicast_packet->dest)) { + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); if (ret == NET_RX_DROP) @@ -1151,13 +1153,13 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, goto out; /* ignore broadcasts sent by myself */ - if (batadv_is_my_mac(ethhdr->h_source)) + if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) goto out; bcast_packet = (struct batadv_bcast_packet *)skb->data; /* ignore broadcasts originated by myself */ - if (batadv_is_my_mac(bcast_packet->orig)) + if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) goto out; if (bcast_packet->header.ttl < 2) @@ -1243,14 +1245,14 @@ int batadv_recv_vis_packet(struct sk_buff *skb, ethhdr = (struct ethhdr *)skb_mac_header(skb); /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return NET_RX_DROP; /* ignore own packets */ - if (batadv_is_my_mac(vis_packet->vis_orig)) + if (batadv_is_my_mac(bat_priv, vis_packet->vis_orig)) return NET_RX_DROP; - if (batadv_is_my_mac(vis_packet->sender_orig)) + if (batadv_is_my_mac(bat_priv, vis_packet->sender_orig)) return NET_RX_DROP; switch (vis_packet->vis_type) { diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 98a66a021a60..7abee19567e9 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1953,7 +1953,7 @@ out: bool batadv_send_tt_response(struct batadv_priv *bat_priv, struct batadv_tt_query_packet *tt_request) { - if (batadv_is_my_mac(tt_request->dst)) { + if (batadv_is_my_mac(bat_priv, tt_request->dst)) { /* don't answer backbone gws! */ if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src)) return true; diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index c053244b97bd..6a1e646be96d 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -477,7 +477,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, /* Are we the target for this VIS packet? */ if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && - batadv_is_my_mac(vis_packet->target_orig)) + batadv_is_my_mac(bat_priv, vis_packet->target_orig)) are_target = 1; spin_lock_bh(&bat_priv->vis.hash_lock); @@ -496,7 +496,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, batadv_send_list_add(bat_priv, info); /* ... we're not the recipient (and thus need to forward). */ - } else if (!batadv_is_my_mac(packet->target_orig)) { + } else if (!batadv_is_my_mac(bat_priv, packet->target_orig)) { batadv_send_list_add(bat_priv, info); } From 355f1ecbcc81efb6611290b772e7ae77ece4059c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 17 Apr 2013 15:58:23 -0700 Subject: [PATCH 250/303] thinkpad-acpi: kill hotkey_thread_mutex hotkey_kthread() does try_to_freeze() under hotkey_thread_mutex. We can simply kill this mutex, hotkey_poll_stop_sync() does not need to serialize with hotkey_kthread(). When kthread_stop() returns the thread is already dead, it called do_exit()->complete_vfork_done(). Reported-by: Artem Savkov Reported-by: Maciej Rutecki Signed-off-by: Oleg Nesterov Acked-by: Henrique de Moraes Holschuh Cc: Matthew Garrett Cc: "Rafael J. Wysocki" Reviewed-by: Mandeep Singh Baines Cc: Aaron Lu Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/platform/x86/thinkpad_acpi.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 9a907567f41e..edec135b1685 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1964,9 +1964,6 @@ struct tp_nvram_state { /* kthread for the hotkey poller */ static struct task_struct *tpacpi_hotkey_task; -/* Acquired while the poller kthread is running, use to sync start/stop */ -static struct mutex hotkey_thread_mutex; - /* * Acquire mutex to write poller control variables as an * atomic block. @@ -2462,8 +2459,6 @@ static int hotkey_kthread(void *data) unsigned int poll_freq; bool was_frozen; - mutex_lock(&hotkey_thread_mutex); - if (tpacpi_lifecycle == TPACPI_LIFE_EXITING) goto exit; @@ -2523,7 +2518,6 @@ static int hotkey_kthread(void *data) } exit: - mutex_unlock(&hotkey_thread_mutex); return 0; } @@ -2533,9 +2527,6 @@ static void hotkey_poll_stop_sync(void) if (tpacpi_hotkey_task) { kthread_stop(tpacpi_hotkey_task); tpacpi_hotkey_task = NULL; - mutex_lock(&hotkey_thread_mutex); - /* at this point, the thread did exit */ - mutex_unlock(&hotkey_thread_mutex); } } @@ -3234,7 +3225,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) mutex_init(&hotkey_mutex); #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL - mutex_init(&hotkey_thread_mutex); mutex_init(&hotkey_thread_data_mutex); #endif From d875cf08391a811703f8adf39db598cac9ece6a1 Mon Sep 17 00:00:00 2001 From: Andrei Epure Date: Wed, 17 Apr 2013 15:58:24 -0700 Subject: [PATCH 251/303] drivers/video/mmp/core.c: fix use-after-free bug Found with coccinelle. Signed-off-by: Andrei Epure Cc: Florian Tobias Schandinat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/mmp/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/video/mmp/core.c b/drivers/video/mmp/core.c index 9ed83419038b..84de2632857a 100644 --- a/drivers/video/mmp/core.c +++ b/drivers/video/mmp/core.c @@ -252,7 +252,5 @@ void mmp_unregister_path(struct mmp_path *path) kfree(path); mutex_unlock(&disp_lock); - - dev_info(path->dev, "de-register %s\n", path->name); } EXPORT_SYMBOL_GPL(mmp_unregister_path); From e942e2c3f7e093fc8756dd8b47c93a821c09429f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 17 Apr 2013 15:58:26 -0700 Subject: [PATCH 252/303] checkpatch: fix stringification macro defect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix checkpatch misreporting defect with stringification macros ERROR: Macros with complex values should be enclosed in parenthesis #27: FILE: arch/arm/include/asm/kgdb.h:41: +#define ___to_string(X) #X Signed-off-by: Joe Perches Reported-by: Vincent Stehlé Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index b28cc384a5bc..4de4bc48493b 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3016,6 +3016,7 @@ sub process { $dstat !~ /^'X'$/ && # character constants $dstat !~ /$exceptions/ && $dstat !~ /^\.$Ident\s*=/ && # .foo = + $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo $dstat !~ /^do\s*$Constant\s*while\s*$Constant;?$/ && # do {...} while (...); // do {...} while (...) $dstat !~ /^for\s*$Constant$/ && # for (...) $dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar() From a2fce9143057f4eb7675a21cca1b6beabe585c8b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 17 Apr 2013 15:58:27 -0700 Subject: [PATCH 253/303] hugetlbfs: stop setting VM_DONTDUMP in initializing vma(VM_HUGETLB) Currently we fail to include any data on hugepages into coredump, because VM_DONTDUMP is set on hugetlbfs's vma. This behavior was recently introduced by commit 314e51b9851b ("mm: kill vma flag VM_RESERVED and mm->reserved_vm counter"). This looks to me a serious regression, so let's fix it. Signed-off-by: Naoya Horiguchi Acked-by: Konstantin Khlebnikov Acked-by: Michal Hocko Reviewed-by: Rik van Riel Acked-by: KOSAKI Motohiro Acked-by: David Rientjes Cc: [3.7+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 84e3d856e91d..523464e62849 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap_pgoff unwinds (may be important on powerpc * and ia64). */ - vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) From 23d9e482136e31c9d287633a6e473daa172767c4 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 17 Apr 2013 15:58:28 -0700 Subject: [PATCH 254/303] fs/binfmt_elf.c: fix hugetlb memory check in vma_dump_size() Documentation/filesystems/proc.txt says about coredump_filter bitmask, Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only effected by bit 5-6. However current code can go into the subsequent flag checks of bit 0-4 for vma(VM_HUGETLB). So this patch inserts 'return' and makes it work as written in the document. Signed-off-by: Naoya Horiguchi Reviewed-by: Rik van Riel Acked-by: Michal Hocko Reviewed-by: HATAYAMA Daisuke Acked-by: KOSAKI Motohiro Acked-by: David Rientjes Cc: [3.7+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3939829f6c5c..86af964c2425 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1137,6 +1137,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, goto whole; if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) goto whole; + return 0; } /* Do not dump I/O mapped devices or special mappings */ From 9cc3a5bd40067b9a0fbd49199d0780463fc2140f Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 17 Apr 2013 15:58:30 -0700 Subject: [PATCH 255/303] hugetlbfs: add swap entry check in follow_hugetlb_page() With applying the previous patch "hugetlbfs: stop setting VM_DONTDUMP in initializing vma(VM_HUGETLB)" to reenable hugepage coredump, if a memory error happens on a hugepage and the affected processes try to access the error hugepage, we hit VM_BUG_ON(atomic_read(&page->_count) <= 0) in get_page(). The reason for this bug is that coredump-related code doesn't recognise "hugepage hwpoison entry" with which a pmd entry is replaced when a memory error occurs on a hugepage. In other words, physical address information is stored in different bit layout between hugepage hwpoison entry and pmd entry, so follow_hugetlb_page() which is called in get_dump_page() returns a wrong page from a given address. The expected behavior is like this: absent is_swap_pte FOLL_DUMP Expected behavior ------------------------------------------------------------------- true false false hugetlb_fault false true false hugetlb_fault false false false return page true false true skip page (to avoid allocation) false true true hugetlb_fault false false true return page With this patch, we can call hugetlb_fault() and take proper actions (we wait for migration entries, fail with VM_FAULT_HWPOISON_LARGE for hwpoisoned entries,) and as the result we can dump all hugepages except for hwpoisoned ones. Signed-off-by: Naoya Horiguchi Cc: Rik van Riel Acked-by: Michal Hocko Cc: HATAYAMA Daisuke Acked-by: KOSAKI Motohiro Acked-by: David Rientjes Cc: [2.6.34+?] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ca9a7c6d7e97..1a12f5b9a0ab 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2961,7 +2961,17 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, break; } - if (absent || + /* + * We need call hugetlb_fault for both hugepages under migration + * (in which case hugetlb_fault waits for the migration,) and + * hwpoisoned hugepages (in which case we need to prevent the + * caller from accessing to them.) In order to do this, we use + * here is_swap_pte instead of is_hugetlb_entry_migration and + * is_hugetlb_entry_hwpoisoned. This is because it simply covers + * both cases, and because we can't follow correct pages + * directly from any kind of swap entries. + */ + if (absent || is_swap_pte(huge_ptep_get(pte)) || ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) { int ret; From 472d326677db37625903265428582694394d2df7 Mon Sep 17 00:00:00 2001 From: Josh Wu Date: Wed, 17 Apr 2013 15:58:31 -0700 Subject: [PATCH 256/303] avr32: fix build error in atstk1006_defconfig fixed the following compile error when use avr32 atstk1006_defconfig: drivers/mtd/nand/atmel_nand.c: In function 'pmecc_err_location': drivers/mtd/nand/atmel_nand.c:639: error: implicit declaration of function 'writel_relaxed' which was introduced by commit 1c7b874d33b4 ("mtd: at91: atmel_nand: add Programmable Multibit ECC controller support"). The PMECC for nand flash code uses writel_relaxed(). But in avr32, there is no macro "writel_relaxed" defined. This patch add writex_relaxed macro definitions. Signed-off-by: Josh Wu Acked-by: Havard Skinnemoen Acked-by: Hans-Christian Egtvedt Cc: David Woodhouse Cc: Artem Bityutskiy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/avr32/include/asm/io.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/avr32/include/asm/io.h b/arch/avr32/include/asm/io.h index cf60d0a9f176..fc6483f83ccc 100644 --- a/arch/avr32/include/asm/io.h +++ b/arch/avr32/include/asm/io.h @@ -165,6 +165,10 @@ BUILDIO_IOPORT(l, u32) #define readw_be __raw_readw #define readl_be __raw_readl +#define writeb_relaxed writeb +#define writew_relaxed writew +#define writel_relaxed writel + #define writeb_be __raw_writeb #define writew_be __raw_writew #define writel_be __raw_writel From 12f267a20aecf8b84a2a9069b9011f1661c779b4 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Wed, 17 Apr 2013 15:58:33 -0700 Subject: [PATCH 257/303] hfsplus: fix potential overflow in hfsplus_file_truncate() Change a u32 to loff_t hfsplus_file_truncate(). Signed-off-by: Vyacheslav Dubeyko Cc: Christoph Hellwig Cc: Al Viro Cc: Hin-Tak Leung Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index a94f0f779d5e..fe0a76213d9e 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -533,7 +533,7 @@ void hfsplus_file_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - u32 size = inode->i_size; + loff_t size = inode->i_size; res = pagecache_write_begin(NULL, mapping, size, 0, AOP_FLAG_UNINTERRUPTIBLE, From d72515b85a6583db131ec6032978e3c9d4291d95 Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Wed, 17 Apr 2013 15:58:34 -0700 Subject: [PATCH 258/303] mm/vmscan: fix error return in kswapd_run() Fix the error return value in kswapd_run(). The bug was introduced by commit d5dc0ad928fb ("mm/vmscan: fix error number for failed kthread"). Signed-off-by: Xishi Qiu Reviewed-by: Wanpeng Li Reviewed-by: Rik van Riel Reported-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 88c5fed8b9a4..669fba39be1a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3188,9 +3188,9 @@ int kswapd_run(int nid) if (IS_ERR(pgdat->kswapd)) { /* failure at boot is fatal */ BUG_ON(system_state == SYSTEM_BOOTING); - pgdat->kswapd = NULL; pr_err("Failed to start kswapd on node %d\n", nid); ret = PTR_ERR(pgdat->kswapd); + pgdat->kswapd = NULL; } return ret; } From b9e146d8eb3b9ecae5086d373b50fa0c1f3e7f0f Mon Sep 17 00:00:00 2001 From: Emese Revfy Date: Wed, 17 Apr 2013 15:58:36 -0700 Subject: [PATCH 259/303] kernel/signal.c: stop info leak via the tkill and the tgkill syscalls This fixes a kernel memory contents leak via the tkill and tgkill syscalls for compat processes. This is visible in the siginfo_t->_sifields._rt.si_sigval.sival_ptr field when handling signals delivered from tkill. The place of the infoleak: int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { ... put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); ... } Signed-off-by: Emese Revfy Reviewed-by: PaX Team Signed-off-by: Kees Cook Cc: Al Viro Cc: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Serge Hallyn Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index dd72567767d9..598dc06be421 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2948,7 +2948,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) static int do_tkill(pid_t tgid, pid_t pid, int sig) { - struct siginfo info; + struct siginfo info = {}; info.si_signo = sig; info.si_errno = 0; From d202f05158442396033f416df376f8ece1f563df Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Thu, 18 Apr 2013 09:52:59 +0200 Subject: [PATCH 260/303] dmaengine: at_hdmac: fix race condition in atc_advance_work() The BUG_ON() directive is triggered probably due to a latency modification following inclusion of commit c10d73671ad3 ("softirq: reduce latencies"). This condition has not been met before 3.9-rc1 and doesn't trigger without this patch. We now make sure that DMA channel is idle before calling atc_complete_all() which makes the BUG_ON() "protection" useless. Signed-off-by: Ludovic Desroches Signed-off-by: Nicolas Ferre Acked-by: Vinod Koul Signed-off-by: Linus Torvalds --- drivers/dma/at_hdmac.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 6e13f262139a..88cfc61329d2 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -310,8 +310,6 @@ static void atc_complete_all(struct at_dma_chan *atchan) dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n"); - BUG_ON(atc_chan_is_enabled(atchan)); - /* * Submit queued descriptors ASAP, i.e. before we go through * the completed ones. @@ -368,6 +366,9 @@ static void atc_advance_work(struct at_dma_chan *atchan) { dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); + if (atc_chan_is_enabled(atchan)) + return; + if (list_empty(&atchan->active_list) || list_is_singular(&atchan->active_list)) { atc_complete_all(atchan); @@ -1078,9 +1079,7 @@ static void atc_issue_pending(struct dma_chan *chan) return; spin_lock_irqsave(&atchan->lock, flags); - if (!atc_chan_is_enabled(atchan)) { - atc_advance_work(atchan); - } + atc_advance_work(atchan); spin_unlock_irqrestore(&atchan->lock, flags); } From 5c51543b0ae45967cfa99ca16748dc72888cc265 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 18 Apr 2013 18:33:18 +0900 Subject: [PATCH 261/303] kprobes: Fix a double lock bug of kprobe_mutex Fix a double locking bug caused when debug.kprobe-optimization=0. While the proc_kprobes_optimization_handler locks kprobe_mutex, wait_for_kprobe_optimizer locks it again and that causes a double lock. To fix the bug, this introduces different mutex for protecting sysctl parameter and locks it in proc_kprobes_optimization_handler. Of course, since we need to lock kprobe_mutex when touching kprobes resources, that is done in *optimize_all_kprobes(). This bug was introduced by commit ad72b3bea744 ("kprobes: fix wait_for_kprobe_optimizer()") Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar Cc: Tejun Heo Cc: "David S. Miller" Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e35be53f6613..3fed7f0cbcdf 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -794,16 +794,16 @@ out: } #ifdef CONFIG_SYSCTL -/* This should be called with kprobe_mutex locked */ static void __kprobes optimize_all_kprobes(void) { struct hlist_head *head; struct kprobe *p; unsigned int i; + mutex_lock(&kprobe_mutex); /* If optimization is already allowed, just return */ if (kprobes_allow_optimization) - return; + goto out; kprobes_allow_optimization = true; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { @@ -813,18 +813,22 @@ static void __kprobes optimize_all_kprobes(void) optimize_kprobe(p); } printk(KERN_INFO "Kprobes globally optimized\n"); +out: + mutex_unlock(&kprobe_mutex); } -/* This should be called with kprobe_mutex locked */ static void __kprobes unoptimize_all_kprobes(void) { struct hlist_head *head; struct kprobe *p; unsigned int i; + mutex_lock(&kprobe_mutex); /* If optimization is already prohibited, just return */ - if (!kprobes_allow_optimization) + if (!kprobes_allow_optimization) { + mutex_unlock(&kprobe_mutex); return; + } kprobes_allow_optimization = false; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { @@ -834,11 +838,14 @@ static void __kprobes unoptimize_all_kprobes(void) unoptimize_kprobe(p, false); } } + mutex_unlock(&kprobe_mutex); + /* Wait for unoptimizing completion */ wait_for_kprobe_optimizer(); printk(KERN_INFO "Kprobes globally unoptimized\n"); } +static DEFINE_MUTEX(kprobe_sysctl_mutex); int sysctl_kprobes_optimization; int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, @@ -846,7 +853,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, { int ret; - mutex_lock(&kprobe_mutex); + mutex_lock(&kprobe_sysctl_mutex); sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0; ret = proc_dointvec_minmax(table, write, buffer, length, ppos); @@ -854,7 +861,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, optimize_all_kprobes(); else unoptimize_all_kprobes(); - mutex_unlock(&kprobe_mutex); + mutex_unlock(&kprobe_sysctl_mutex); return ret; } From 7eff7ded02d1b15ba8321664839b353fa6c0c1e4 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 18 Apr 2013 08:44:46 -0700 Subject: [PATCH 262/303] x86, hyperv: Handle Xen emulation of Hyper-V more gracefully Install the Hyper-V specific interrupt handler only when needed. This would permit us to get rid of the Xen check. Note that when the vmbus drivers invokes the call to register its handler, we are sure to be running on Hyper-V. Signed-off-by: K. Y. Srinivasan Link: http://lkml.kernel.org/r/1366299886-6399-1-git-send-email-kys@microsoft.com Acked-by: Michael S. Tsirkin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mshyperv.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index a7d26d83fb70..8f4be53ea04b 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -35,13 +35,6 @@ static bool __init ms_hyperv_platform(void) if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) return false; - /* - * Xen emulates Hyper-V to support enlightened Windows. - * Check to see first if we are on a Xen Hypervisor. - */ - if (xen_cpuid_base()) - return false; - cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); @@ -82,12 +75,6 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); -#if IS_ENABLED(CONFIG_HYPERV) - /* - * Setup the IDT for hypervisor callback. - */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); -#endif } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { @@ -103,6 +90,11 @@ static irq_handler_t vmbus_isr; void hv_register_vmbus_handler(int irq, irq_handler_t handler) { + /* + * Setup the IDT for hypervisor callback. + */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); + vmbus_irq = irq; vmbus_isr = handler; } From 0a82a8d132b26d438eb90b3ab35a7016e7227a1d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 18 Apr 2013 09:00:26 -0700 Subject: [PATCH 263/303] Revert "block: add missing block_bio_complete() tracepoint" This reverts commit 3a366e614d0837d9fc23f78cdb1a1186ebc3387f. Wanlong Gao reports that it causes a kernel panic on his machine several minutes after boot. Reverting it removes the panic. Jens says: "It's not quite clear why that is yet, so I think we should just revert the commit for 3.9 final (which I'm assuming is pretty close). The wifi is crap at the LSF hotel, so sending this email instead of queueing up a revert and pull request." Reported-by: Wanlong Gao Requested-by: Jens Axboe Cc: Tejun Heo Cc: Steven Rostedt Signed-off-by: Linus Torvalds --- block/blk-core.c | 1 + drivers/md/dm.c | 1 + drivers/md/raid5.c | 11 ++++++++++- fs/bio.c | 2 -- include/linux/blktrace_api.h | 1 - include/trace/events/block.h | 8 ++++---- kernel/trace/blktrace.c | 26 +++----------------------- 7 files changed, 19 insertions(+), 31 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 074b758efc42..7c288358a745 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -39,6 +39,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); DEFINE_IDA(blk_queue_ida); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7e469260fe5e..9a0bdad9ad8f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -611,6 +611,7 @@ static void dec_pending(struct dm_io *io, int error) queue_io(md, bio); } else { /* done with normal IO or empty flush */ + trace_block_bio_complete(md->queue, bio, io_error); bio_endio(bio, io_error); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 24909eb13fec..f4e87bfc7567 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -184,6 +184,8 @@ static void return_io(struct bio *return_bi) return_bi = bi->bi_next; bi->bi_next = NULL; bi->bi_size = 0; + trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), + bi, 0); bio_endio(bi, 0); bi = return_bi; } @@ -3914,6 +3916,8 @@ static void raid5_align_endio(struct bio *bi, int error) rdev_dec_pending(rdev, conf->mddev); if (!error && uptodate) { + trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), + raid_bi, 0); bio_endio(raid_bi, 0); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); @@ -4382,6 +4386,8 @@ static void make_request(struct mddev *mddev, struct bio * bi) if ( rw == WRITE ) md_write_end(mddev); + trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), + bi, 0); bio_endio(bi, 0); } } @@ -4758,8 +4764,11 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) handled++; } remaining = raid5_dec_bi_active_stripes(raid_bio); - if (remaining == 0) + if (remaining == 0) { + trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), + raid_bio, 0); bio_endio(raid_bio, 0); + } if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); return handled; diff --git a/fs/bio.c b/fs/bio.c index bb5768f59b32..b96fc6ce4855 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1428,8 +1428,6 @@ void bio_endio(struct bio *bio, int error) else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; - trace_block_bio_complete(bio, error); - if (bio->bi_end_io) bio->bi_end_io(bio, error); } diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 0ea61e07a91c..7c2e030e72f1 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -12,7 +12,6 @@ struct blk_trace { int trace_state; - bool rq_based; struct rchan *rchan; unsigned long __percpu *sequence; unsigned char __percpu *msg_data; diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 9961726523d0..9c1467357b03 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -257,6 +257,7 @@ TRACE_EVENT(block_bio_bounce, /** * block_bio_complete - completed all work on the block operation + * @q: queue holding the block operation * @bio: block operation completed * @error: io error value * @@ -265,9 +266,9 @@ TRACE_EVENT(block_bio_bounce, */ TRACE_EVENT(block_bio_complete, - TP_PROTO(struct bio *bio, int error), + TP_PROTO(struct request_queue *q, struct bio *bio, int error), - TP_ARGS(bio, error), + TP_ARGS(q, bio, error), TP_STRUCT__entry( __field( dev_t, dev ) @@ -278,8 +279,7 @@ TRACE_EVENT(block_bio_complete, ), TP_fast_assign( - __entry->dev = bio->bi_bdev ? - bio->bi_bdev->bd_dev : 0; + __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; __entry->error = error; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 9e5b8c272eec..5a0f781cd729 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -739,12 +739,6 @@ static void blk_add_trace_rq_complete(void *ignore, struct request_queue *q, struct request *rq) { - struct blk_trace *bt = q->blk_trace; - - /* if control ever passes through here, it's a request based driver */ - if (unlikely(bt && !bt->rq_based)) - bt->rq_based = true; - blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); } @@ -780,24 +774,10 @@ static void blk_add_trace_bio_bounce(void *ignore, blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } -static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error) +static void blk_add_trace_bio_complete(void *ignore, + struct request_queue *q, struct bio *bio, + int error) { - struct request_queue *q; - struct blk_trace *bt; - - if (!bio->bi_bdev) - return; - - q = bdev_get_queue(bio->bi_bdev); - bt = q->blk_trace; - - /* - * Request based drivers will generate both rq and bio completions. - * Ignore bio ones. - */ - if (likely(!bt) || bt->rq_based) - return; - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); } From 73053d973dd6f56472309cffa5a5d15a62dd6f96 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 17 Apr 2013 10:46:52 -0500 Subject: [PATCH 264/303] ARM: highbank: fix cache flush ordering for cpu hotplug The L1 data cache flush needs to be after highbank_set_cpu_jump call which pollutes the cache with the l2x0_lock. This causes other cores to deadlock waiting for the l2x0_lock. Moving the flush of the entire data cache after highbank_set_cpu_jump fixes the problem. Use flush_cache_louis instead of flush_cache_all are that is sufficient to flush only the L1 data cache. flush_cache_louis did not exist when highbank_cpu_die was originally written. With PL310 errata 769419 enabled, a wmb is inserted into idle which takes the l2x0_lock. This makes the problem much more easily hit and causes reset to hang. Reported-by: Paolo Pisati Signed-off-by: Rob Herring Signed-off-by: Olof Johansson --- arch/arm/mach-highbank/hotplug.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-highbank/hotplug.c b/arch/arm/mach-highbank/hotplug.c index f30c52843396..890cae23c12a 100644 --- a/arch/arm/mach-highbank/hotplug.c +++ b/arch/arm/mach-highbank/hotplug.c @@ -28,13 +28,11 @@ extern void secondary_startup(void); */ void __ref highbank_cpu_die(unsigned int cpu) { - flush_cache_all(); - highbank_set_cpu_jump(cpu, phys_to_virt(0)); + + flush_cache_louis(); highbank_set_core_pwr(); - cpu_do_idle(); - - /* We should never return from idle */ - panic("highbank: cpu %d unexpectedly exit from shutdown\n", cpu); + while (1) + cpu_do_idle(); } From f9d40f6a9921cc7d9385f64362314054e22152bd Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 17 Apr 2013 20:41:04 +0000 Subject: [PATCH 265/303] igb: Revert support for build_skb in igb This patch actually reverts: igb: Support using build_skb in the case that jumbo frames are disabled The reason for reverting this patch is that it can lead to data corruption. The following flow was pointed out by Ben Hutchings: 1. skb is forwarded to another device 2. Packet headers are modified and it's put into a queue 3. Second packet is received into the other half of this page 4. Page cannot be reused, so is DMA-unmapped 5. The DMA mapping was non-coherent, so unmap copies or invalidates cache The headers added in step 2 get trashed in step 5. Reported-by: Ben Hutchings Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb.h | 8 -- drivers/net/ethernet/intel/igb/igb_main.c | 110 +--------------------- 2 files changed, 4 insertions(+), 114 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 25151401c2ab..ab577a763a20 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -284,18 +284,10 @@ struct igb_q_vector { enum e1000_ring_flags_t { IGB_RING_FLAG_RX_SCTP_CSUM, IGB_RING_FLAG_RX_LB_VLAN_BSWAP, - IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, IGB_RING_FLAG_TX_CTX_IDX, IGB_RING_FLAG_TX_DETECT_HANG }; -#define ring_uses_build_skb(ring) \ - test_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) -#define set_ring_build_skb_enabled(ring) \ - set_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) -#define clear_ring_build_skb_enabled(ring) \ - clear_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) - #define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS) #define IGB_RX_DESC(R, i) \ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 8496adfc6a68..64f75291e3a5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3350,20 +3350,6 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, wr32(E1000_RXDCTL(reg_idx), rxdctl); } -static void igb_set_rx_buffer_len(struct igb_adapter *adapter, - struct igb_ring *rx_ring) -{ -#define IGB_MAX_BUILD_SKB_SIZE \ - (SKB_WITH_OVERHEAD(IGB_RX_BUFSZ) - \ - (NET_SKB_PAD + NET_IP_ALIGN + IGB_TS_HDR_LEN)) - - /* set build_skb flag */ - if (adapter->max_frame_size <= IGB_MAX_BUILD_SKB_SIZE) - set_ring_build_skb_enabled(rx_ring); - else - clear_ring_build_skb_enabled(rx_ring); -} - /** * igb_configure_rx - Configure receive Unit after Reset * @adapter: board private structure @@ -3383,11 +3369,8 @@ static void igb_configure_rx(struct igb_adapter *adapter) /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ - for (i = 0; i < adapter->num_rx_queues; i++) { - struct igb_ring *rx_ring = adapter->rx_ring[i]; - igb_set_rx_buffer_len(adapter, rx_ring); - igb_configure_rx_ring(adapter, rx_ring); - } + for (i = 0; i < adapter->num_rx_queues; i++) + igb_configure_rx_ring(adapter, adapter->rx_ring[i]); } /** @@ -6203,78 +6186,6 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring, return igb_can_reuse_rx_page(rx_buffer, page, truesize); } -static struct sk_buff *igb_build_rx_buffer(struct igb_ring *rx_ring, - union e1000_adv_rx_desc *rx_desc) -{ - struct igb_rx_buffer *rx_buffer; - struct sk_buff *skb; - struct page *page; - void *page_addr; - unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); -#if (PAGE_SIZE < 8192) - unsigned int truesize = IGB_RX_BUFSZ; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(NET_SKB_PAD + - NET_IP_ALIGN + - size); -#endif - - /* If we spanned a buffer we have a huge mess so test for it */ - BUG_ON(unlikely(!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))); - - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - page = rx_buffer->page; - prefetchw(page); - - page_addr = page_address(page) + rx_buffer->page_offset; - - /* prefetch first cache line of first page */ - prefetch(page_addr + NET_SKB_PAD + NET_IP_ALIGN); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES + NET_SKB_PAD + NET_IP_ALIGN); -#endif - - /* build an skb to around the page buffer */ - skb = build_skb(page_addr, truesize); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_failed++; - return NULL; - } - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - IGB_RX_BUFSZ, - DMA_FROM_DEVICE); - - /* update pointers within the skb to store the data */ - skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD); - __skb_put(skb, size); - - /* pull timestamp out of packet data */ - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); - __skb_pull(skb, IGB_TS_HDR_LEN); - } - - if (igb_can_reuse_rx_page(rx_buffer, page, truesize)) { - /* hand second half of page back to the ring */ - igb_reuse_rx_page(rx_ring, rx_buffer); - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, DMA_FROM_DEVICE); - } - - /* clear contents of buffer_info */ - rx_buffer->dma = 0; - rx_buffer->page = NULL; - - return skb; -} - static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) @@ -6690,10 +6601,7 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) rmb(); /* retrieve a buffer from the ring */ - if (ring_uses_build_skb(rx_ring)) - skb = igb_build_rx_buffer(rx_ring, rx_desc); - else - skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); + skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); /* exit if we failed to retrieve a buffer */ if (!skb) @@ -6780,14 +6688,6 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, return true; } -static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring) -{ - if (ring_uses_build_skb(rx_ring)) - return NET_SKB_PAD + NET_IP_ALIGN; - else - return 0; -} - /** * igb_alloc_rx_buffers - Replace used receive buffers; packet split * @adapter: address of board private structure @@ -6814,9 +6714,7 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) * Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + - bi->page_offset + - igb_rx_offset(rx_ring)); + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); rx_desc++; bi++; From 026ac677414c62550269d53907ea47412ec2bd81 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 17 Apr 2013 20:41:35 +0000 Subject: [PATCH 266/303] ixgbe: Fix a bug in setting VF VLAN via PF The PF driver does not check if the administrator has already set a VF VLAN via the PF driver before setting the new VLAN. This results in the following scenario: A) Administrator sets VF to VLAN 100 B) Administrator sets VF to VLAN 100 C) Administrator sets VF to VLAN 200 D) The VF driver continues to be able to receive traffic on VLAN 100 because the VLVFB pool enable bit for that VF was left set instead of being cleared as it should be. This fix ensures that the old VLAN filter for VF is first removed and the pool bit enable for VF is cleared so that it no longer receives traffic on VLAN 100. Signed-off-by: Greg Rose Tested-by: Sibai Li Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index d44b4d21268c..97e33669c0b9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1049,6 +1049,12 @@ int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7)) return -EINVAL; if (vlan || qos) { + if (adapter->vfinfo[vf].pf_vlan) + err = ixgbe_set_vf_vlan(adapter, false, + adapter->vfinfo[vf].pf_vlan, + vf); + if (err) + goto out; err = ixgbe_set_vf_vlan(adapter, true, vlan, vf); if (err) goto out; From a4325ea242d9d0889cb7e86811fef3ff3d714d29 Mon Sep 17 00:00:00 2001 From: Jitendra Kalsaria Date: Thu, 18 Apr 2013 08:10:04 +0000 Subject: [PATCH 267/303] qlcnic: Stop traffic before performing loopback test Before conducting loopback test by sending packets, driver should stop transmit queue and turn off carrier. Signed-off-by: Jitendra Kalsaria Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index cd5ae8813cb3..284d6288888c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -1500,6 +1500,12 @@ int qlcnic_83xx_loopback_test(struct net_device *netdev, u8 mode) } } while ((adapter->ahw->linkup && ahw->has_link_events) != 1); + /* Make sure carrier is off and queue is stopped during loopback */ + if (netif_running(netdev)) { + netif_carrier_off(netdev); + netif_stop_queue(netdev); + } + ret = qlcnic_do_lb_test(adapter, mode); qlcnic_83xx_clear_lb_mode(adapter, mode); From 3eead213fe9828b511faf74a7ed566b56b2b3bcc Mon Sep 17 00:00:00 2001 From: Sritej Velaga Date: Thu, 18 Apr 2013 08:10:05 +0000 Subject: [PATCH 268/303] qlcnic: fix TSO race condition When driver receives a packet with gso size > 0 and when TSO is disabled, it should be transmitted as a TSO packet to prevent Tx timeout and subsequent firmware reset. Signed-off-by: Sritej Velaga Signed-off-by: Shahed Shaikh Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 0e630061bff3..5fa847fe388a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -358,8 +358,7 @@ set_flags: memcpy(&first_desc->eth_addr, skb->data, ETH_ALEN); } opcode = TX_ETHER_PKT; - if ((adapter->netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) && - skb_shinfo(skb)->gso_size > 0) { + if (skb_is_gso(skb)) { hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); first_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); first_desc->total_hdr_length = hdr_len; From 7bc27a8cd4db2e6ed71d29b5ceae5b4ee873c9af Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Thu, 18 Apr 2013 08:10:06 +0000 Subject: [PATCH 269/303] qlcnic: Fix typo in logs o Debug logs were not matching with code functionality. o Changed dev_info to netdev_err Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 284d6288888c..edd63f1230f3 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2786,6 +2786,7 @@ static u64 *qlcnic_83xx_fill_stats(struct qlcnic_adapter *adapter, void qlcnic_83xx_get_stats(struct qlcnic_adapter *adapter, u64 *data) { struct qlcnic_cmd_args cmd; + struct net_device *netdev = adapter->netdev; int ret = 0; qlcnic_alloc_mbx_args(&cmd, adapter, QLCNIC_CMD_GET_STATISTICS); @@ -2795,7 +2796,7 @@ void qlcnic_83xx_get_stats(struct qlcnic_adapter *adapter, u64 *data) data = qlcnic_83xx_fill_stats(adapter, &cmd, data, QLC_83XX_STAT_TX, &ret); if (ret) { - dev_info(&adapter->pdev->dev, "Error getting MAC stats\n"); + netdev_err(netdev, "Error getting Tx stats\n"); goto out; } /* Get MAC stats */ @@ -2805,8 +2806,7 @@ void qlcnic_83xx_get_stats(struct qlcnic_adapter *adapter, u64 *data) data = qlcnic_83xx_fill_stats(adapter, &cmd, data, QLC_83XX_STAT_MAC, &ret); if (ret) { - dev_info(&adapter->pdev->dev, - "Error getting Rx stats\n"); + netdev_err(netdev, "Error getting MAC stats\n"); goto out; } /* Get Rx stats */ @@ -2816,8 +2816,7 @@ void qlcnic_83xx_get_stats(struct qlcnic_adapter *adapter, u64 *data) data = qlcnic_83xx_fill_stats(adapter, &cmd, data, QLC_83XX_STAT_RX, &ret); if (ret) - dev_info(&adapter->pdev->dev, - "Error getting Tx stats\n"); + netdev_err(netdev, "Error getting Rx stats\n"); out: qlcnic_free_mbx_args(&cmd); } From d46f7c4df342c27cea676939d7c005f1e86173e9 Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Wed, 17 Apr 2013 22:49:05 +0000 Subject: [PATCH 270/303] bnx2x: Prevent UNDI FW illegal host access When loading after UNDI (e.g., Boot from SAN) the UNDI does not gracefully yield its resources; The bnx2x driver handles that release itself. During the manipulation required to release those resources, it's possible for the UNDI to try and write to memory regions which are no longer accessible, causing the PCI bus to prevent further writes from the chip. This would in turn cause DMAE timeouts later on in the driver, as the driver will be unable to use the chip's DMA engines. This patch prevents the chip from actually writing through the PCI bus in said scenario, thus allowing the release without the unfortunate by-product. Signed-off-by: Dmitry Kravkov Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 8e58da909f5c..c50696b396f1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9878,6 +9878,10 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp) REG_RD(bp, NIG_REG_NIG_INT_STS_CLR_0); } } + if (!CHIP_IS_E1x(bp)) + /* block FW from writing to host */ + REG_WR(bp, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, 0); + /* wait until BRB is empty */ tmp_reg = REG_RD(bp, BRB1_REG_NUM_OF_FULL_BLOCKS); while (timer_count) { From 0c14e5ced264620284bd96f888614768c9bd0976 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Wed, 17 Apr 2013 22:49:06 +0000 Subject: [PATCH 271/303] bnx2x: Fix status blocks configuration This fixes 2 issues regarding bnx2x's status blocks: 1. ethtool -c caused corruption of status blocks in FW RAM. 2. when using multi-CoS, the configuration of the timeout values of status blocks is incorrect, harming the coalescing of interrupts for such CoSs. Signed-off-by: Ariel Elior Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 4046f97378c2..57619dd4a92b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2614,6 +2614,9 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) } } + /* initialize FW coalescing state machines in RAM */ + bnx2x_update_coalesce(bp); + /* setup the leading queue */ rc = bnx2x_setup_leading(bp); if (rc) { @@ -4580,11 +4583,11 @@ static void storm_memset_hc_disable(struct bnx2x *bp, u8 port, u32 enable_flag = disable ? 0 : (1 << HC_INDEX_DATA_HC_ENABLED_SHIFT); u32 addr = BAR_CSTRORM_INTMEM + CSTORM_STATUS_BLOCK_DATA_FLAGS_OFFSET(fw_sb_id, sb_index); - u16 flags = REG_RD16(bp, addr); + u8 flags = REG_RD8(bp, addr); /* clear and set */ flags &= ~HC_INDEX_DATA_HC_ENABLED; flags |= enable_flag; - REG_WR16(bp, addr, flags); + REG_WR8(bp, addr, flags); DP(NETIF_MSG_IFUP, "port %x fw_sb_id %d sb_index %d disable %d\n", port, fw_sb_id, sb_index, disable); From 4394542ca4ec9f28c3c8405063d200b1e7c347d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Apr 2013 17:03:24 +0000 Subject: [PATCH 272/303] bonding: fix l23 and l34 load balancing in forwarding path Since commit 6b923cb7188d46 (bonding: support for IPv6 transmit hashing) bonding doesn't properly hash traffic in forwarding setups. Vitaly V. Bursov diagnosed that skb_network_header_len() returned 0 in this case. More generally, the transport header might not be in the skb head. Use pskb_may_pull() & skb_header_pointer() to get it right, and use proto_ports_offset() in bond_xmit_hash_policy_l34() to get support for more protocols than TCP and UDP. Reported-by: Vitaly V. Bursov Signed-off-by: Eric Dumazet Cc: Jay Vosburgh Cc: Andy Gospodarek Cc: John Eaglesham Tested-by: Vitaly V. Bursov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 55 ++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a61a760484f7..dea8ce20fea4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3296,20 +3296,22 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) */ static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) { - struct ethhdr *data = (struct ethhdr *)skb->data; - struct iphdr *iph; - struct ipv6hdr *ipv6h; + const struct ethhdr *data; + const struct iphdr *iph; + const struct ipv6hdr *ipv6h; u32 v6hash; - __be32 *s, *d; + const __be32 *s, *d; if (skb->protocol == htons(ETH_P_IP) && - skb_network_header_len(skb) >= sizeof(*iph)) { + pskb_network_may_pull(skb, sizeof(*iph))) { iph = ip_hdr(skb); + data = (struct ethhdr *)skb->data; return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ (data->h_dest[5] ^ data->h_source[5])) % count; } else if (skb->protocol == htons(ETH_P_IPV6) && - skb_network_header_len(skb) >= sizeof(*ipv6h)) { + pskb_network_may_pull(skb, sizeof(*ipv6h))) { ipv6h = ipv6_hdr(skb); + data = (struct ethhdr *)skb->data; s = &ipv6h->saddr.s6_addr32[0]; d = &ipv6h->daddr.s6_addr32[0]; v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); @@ -3328,33 +3330,36 @@ static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) { u32 layer4_xor = 0; - struct iphdr *iph; - struct ipv6hdr *ipv6h; - __be32 *s, *d; - __be16 *layer4hdr; + const struct iphdr *iph; + const struct ipv6hdr *ipv6h; + const __be32 *s, *d; + const __be16 *l4 = NULL; + __be16 _l4[2]; + int noff = skb_network_offset(skb); + int poff; if (skb->protocol == htons(ETH_P_IP) && - skb_network_header_len(skb) >= sizeof(*iph)) { + pskb_may_pull(skb, noff + sizeof(*iph))) { iph = ip_hdr(skb); - if (!ip_is_fragment(iph) && - (iph->protocol == IPPROTO_TCP || - iph->protocol == IPPROTO_UDP) && - (skb_headlen(skb) - skb_network_offset(skb) >= - iph->ihl * sizeof(u32) + sizeof(*layer4hdr) * 2)) { - layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); - layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1)); + poff = proto_ports_offset(iph->protocol); + + if (!ip_is_fragment(iph) && poff >= 0) { + l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff, + sizeof(_l4), &_l4); + if (l4) + layer4_xor = ntohs(l4[0] ^ l4[1]); } return (layer4_xor ^ ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; } else if (skb->protocol == htons(ETH_P_IPV6) && - skb_network_header_len(skb) >= sizeof(*ipv6h)) { + pskb_may_pull(skb, noff + sizeof(*ipv6h))) { ipv6h = ipv6_hdr(skb); - if ((ipv6h->nexthdr == IPPROTO_TCP || - ipv6h->nexthdr == IPPROTO_UDP) && - (skb_headlen(skb) - skb_network_offset(skb) >= - sizeof(*ipv6h) + sizeof(*layer4hdr) * 2)) { - layer4hdr = (__be16 *)(ipv6h + 1); - layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1)); + poff = proto_ports_offset(ipv6h->nexthdr); + if (poff >= 0) { + l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff, + sizeof(_l4), &_l4); + if (l4) + layer4_xor = ntohs(l4[0] ^ l4[1]); } s = &ipv6h->saddr.s6_addr32[0]; d = &ipv6h->daddr.s6_addr32[0]; From 5add189a125e6b497e31bffdaaed8145ec6d4984 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Apr 2013 14:30:54 +0200 Subject: [PATCH 273/303] netfilter: ipset: bitmap:ip,mac: fix listing with timeout The type when timeout support was enabled, could not list all elements, just the first ones which could fit into one netlink message: it just did not continue listing after the first message. Reported-by: Yoann JUET Signed-off-by: Jozsef Kadlecsik Tested-by: Yoann JUET Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 0f92dc24cb89..d7df6ac2c6f1 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -339,7 +339,11 @@ bitmap_ipmac_tlist(const struct ip_set *set, nla_put_failure: nla_nest_cancel(skb, nested); ipset_nest_end(skb, atd); - return -EMSGSIZE; + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; } static int From c857b7f45b1f9fb7bad1eaa42b8ad68327738232 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 18 Apr 2013 14:53:10 -0700 Subject: [PATCH 274/303] Revert "hp-wmi: Add support for SMBus hotkeys" This reverts commit fabf85e3ca15d5b94058f391dac8df870cdd427a which breaks hotkey support on some other HP laptops. We'll try doing this differently in 3.10. Signed-off-by: Matthew Garrett --- drivers/platform/x86/hp-wmi.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 45cacf79f3a7..1a779bbfb87d 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -134,7 +134,6 @@ static const struct key_entry hp_wmi_keymap[] = { { KE_KEY, 0x2142, { KEY_MEDIA } }, { KE_KEY, 0x213b, { KEY_INFO } }, { KE_KEY, 0x2169, { KEY_DIRECTION } }, - { KE_KEY, 0x216a, { KEY_SETUP } }, { KE_KEY, 0x231b, { KEY_HELP } }, { KE_END, 0 } }; @@ -925,9 +924,6 @@ static int __init hp_wmi_init(void) err = hp_wmi_input_setup(); if (err) return err; - - //Enable magic for hotkeys that run on the SMBus - ec_write(0xe6,0x6e); } if (bios_capable) { From f83a7ea2075ca896f2dbf07672bac9cf3682ff74 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 17 Apr 2013 22:45:24 +0000 Subject: [PATCH 275/303] netfilter: xt_rpfilter: skip locally generated broadcast/multicast, too Alex Efros reported rpfilter module doesn't match following packets: IN=br.qemu SRC=192.168.2.1 DST=192.168.2.255 [ .. ] (netfilter bugzilla #814). Problem is that network stack arranges for the locally generated broadcasts to appear on the interface they were sent out, so the IFF_LOOPBACK check doesn't trigger. As -m rpfilter is restricted to PREROUTING, we can check for existing rtable instead, it catches locally-generated broad/multicast case, too. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_rpfilter.c | 8 +++++++- net/ipv6/netfilter/ip6t_rpfilter.c | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index c30130062cd6..c49dcd0284a0 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -66,6 +66,12 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4, return dev_match; } +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + return rt && (rt->rt_flags & RTCF_LOCAL); +} + static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info; @@ -76,7 +82,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) info = par->matchinfo; invert = info->flags & XT_RPFILTER_INVERT; - if (par->in->flags & IFF_LOOPBACK) + if (rpfilter_is_local(skb)) return true ^ invert; iph = ip_hdr(skb); diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 5060d54199ab..e0983f3648a6 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -71,6 +71,12 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, return ret; } +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rt6_info *rt = (const void *) skb_dst(skb); + return rt && (rt->rt6i_flags & RTF_LOCAL); +} + static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info = par->matchinfo; @@ -78,7 +84,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ipv6hdr *iph; bool invert = info->flags & XT_RPFILTER_INVERT; - if (par->in->flags & IFF_LOOPBACK) + if (rpfilter_is_local(skb)) return true ^ invert; iph = ipv6_hdr(skb); From 1582eea2081d54b6f9775e8fddc943b7e8e953a6 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 16 Apr 2013 16:09:33 -0700 Subject: [PATCH 276/303] Input: wacom - DTH2242 Grip Pen id was off by one bit Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 49fdff02b5fe..9ec24df36506 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -359,7 +359,7 @@ static int wacom_intuos_inout(struct wacom_wac *wacom) case 0x802: /* Intuos4 General Pen */ case 0x804: /* Intuos4 Marker Pen */ case 0x40802: /* Intuos4 Classic Pen */ - case 0x18803: /* DTH2242 Grip Pen */ + case 0x18802: /* DTH2242 Grip Pen */ case 0x022: wacom->tool[idx] = BTN_TOOL_PEN; break; From 5846115b30f3a881e542c8bfde59a699c1c13740 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 16 Apr 2013 16:10:08 -0700 Subject: [PATCH 277/303] Input: wacom - fix "can not retrieve extra class descriptor" for DTH2242 Same as Cintiq 24HDT, DTH2242 has two interfaces sharing one configuration. This patch ignores the second interface. Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 9ec24df36506..0bfd8cf25200 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -2144,7 +2144,7 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x44) }, { USB_DEVICE_WACOM(0x45) }, { USB_DEVICE_WACOM(0x59) }, - { USB_DEVICE_WACOM(0x5D) }, + { USB_DEVICE_DETAILED(0x5D, USB_CLASS_HID, 0, 0) }, { USB_DEVICE_WACOM(0xB0) }, { USB_DEVICE_WACOM(0xB1) }, { USB_DEVICE_WACOM(0xB2) }, From 2323036dfec8ce3ce6e1c86a49a31b039f3300d1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Apr 2013 09:46:39 -0700 Subject: [PATCH 278/303] vm: convert HPET mmap to vm_iomap_memory() helper This is my example conversion of a few existing mmap users. The HPET case is simple, widely available, and easy to test (Clemens Ladisch sent a trivial test-program for it). Test-program-by: Clemens Ladisch Signed-off-by: Linus Torvalds --- drivers/char/hpet.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index e3f9a99b8522..d784650d14f0 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -373,26 +373,14 @@ static int hpet_mmap(struct file *file, struct vm_area_struct *vma) struct hpet_dev *devp; unsigned long addr; - if (((vma->vm_end - vma->vm_start) != PAGE_SIZE) || vma->vm_pgoff) - return -EINVAL; - devp = file->private_data; addr = devp->hd_hpets->hp_hpet_phys; if (addr & (PAGE_SIZE - 1)) return -ENOSYS; - vma->vm_flags |= VM_IO; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) { - printk(KERN_ERR "%s: io_remap_pfn_range failed\n", - __func__); - return -EAGAIN; - } - - return 0; + return vm_iomap_memory(vma, addr, PAGE_SIZE); #else return -ENOSYS; #endif From 8558e4a26b00225efeb085725bc319f91201b239 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Apr 2013 09:53:07 -0700 Subject: [PATCH 279/303] vm: convert mtdchar mmap to vm_iomap_memory() helper This is my example conversion of a few existing mmap users. The mtdchar case is actually disabled right now (and stays disabled), but I did it because it showed up on my "git grep", and I was familiar with the code due to fixing an overflow problem in the code in commit 9c603e53d380 ("mtdchar: fix offset overflow detection"). Signed-off-by: Linus Torvalds --- drivers/mtd/mtdchar.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 92ab30ab00dc..61a1b22c05e7 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1159,45 +1159,17 @@ static int mtdchar_mmap(struct file *file, struct vm_area_struct *vma) struct mtd_file_info *mfi = file->private_data; struct mtd_info *mtd = mfi->mtd; struct map_info *map = mtd->priv; - resource_size_t start, off; - unsigned long len, vma_len; /* This is broken because it assumes the MTD device is map-based and that mtd->priv is a valid struct map_info. It should be replaced with something that uses the mtd_get_unmapped_area() operation properly. */ if (0 /*mtd->type == MTD_RAM || mtd->type == MTD_ROM*/) { - off = get_vm_offset(vma); - start = map->phys; - len = PAGE_ALIGN((start & ~PAGE_MASK) + map->size); - start &= PAGE_MASK; - vma_len = get_vm_size(vma); - - /* Overflow in off+len? */ - if (vma_len + off < off) - return -EINVAL; - /* Does it fit in the mapping? */ - if (vma_len + off > len) - return -EINVAL; - - off += start; - /* Did that overflow? */ - if (off < start) - return -EINVAL; - if (set_vm_offset(vma, off) < 0) - return -EINVAL; - vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; - #ifdef pgprot_noncached - if (file->f_flags & O_DSYNC || off >= __pa(high_memory)) + if (file->f_flags & O_DSYNC || map->phys >= __pa(high_memory)) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); #endif - if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) - return -EAGAIN; - - return 0; + return vm_iomap_memory(vma, map->phys, map->size); } return -ENOSYS; #else From fc9bbca8f650e5f738af8806317c0a041a48ae4a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Apr 2013 09:57:35 -0700 Subject: [PATCH 280/303] vm: convert fb_mmap to vm_iomap_memory() helper This is my example conversion of a few existing mmap users. The fb_mmap() case is a good example because it is a bit more complicated than some: fb_mmap() mmaps one of two different memory areas depending on the page offset of the mmap (but happily there is never any mixing of the two, so the helper function still works). Signed-off-by: Linus Torvalds --- drivers/video/fbmem.c | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 7c254084b6a0..86291dcd964a 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1373,15 +1373,12 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) { struct fb_info *info = file_fb_info(file); struct fb_ops *fb; - unsigned long off; + unsigned long mmio_pgoff; unsigned long start; u32 len; if (!info) return -ENODEV; - if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) - return -EINVAL; - off = vma->vm_pgoff << PAGE_SHIFT; fb = info->fbops; if (!fb) return -ENODEV; @@ -1393,32 +1390,24 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) return res; } - /* frame buffer memory */ + /* + * Ugh. This can be either the frame buffer mapping, or + * if pgoff points past it, the mmio mapping. + */ start = info->fix.smem_start; - len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.smem_len); - if (off >= len) { - /* memory mapped io */ - off -= len; - if (info->var.accel_flags) { - mutex_unlock(&info->mm_lock); - return -EINVAL; - } + len = info->fix.smem_len; + mmio_pgoff = PAGE_ALIGN((start & ~PAGE_MASK) + len) >> PAGE_SHIFT; + if (vma->vm_pgoff >= mmio_pgoff) { + vma->vm_pgoff -= mmio_pgoff; start = info->fix.mmio_start; - len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len); + len = info->fix.mmio_len; } mutex_unlock(&info->mm_lock); - start &= PAGE_MASK; - if ((vma->vm_end - vma->vm_start + off) > len) - return -EINVAL; - off += start; - vma->vm_pgoff = off >> PAGE_SHIFT; - /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by io_remap_pfn_range()*/ + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - fb_pgprotect(file, vma, off); - if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, vma->vm_page_prot)) - return -EAGAIN; - return 0; + fb_pgprotect(file, vma, start); + + return vm_iomap_memory(vma, start, len); } static int From 0fe09a45c4848b5b5607b968d959fdc1821c161d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Apr 2013 10:01:04 -0700 Subject: [PATCH 281/303] vm: convert snd_pcm_lib_mmap_iomem() to vm_iomap_memory() helper This is my example conversion of a few existing mmap users. The pcm mmap case is one of the more straightforward ones. Acked-by: Takashi Iwai Signed-off-by: Linus Torvalds --- sound/core/pcm_native.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 71ae86ca64ac..eb560fa32321 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3222,18 +3222,10 @@ EXPORT_SYMBOL_GPL(snd_pcm_lib_default_mmap); int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_struct *area) { - long size; - unsigned long offset; + struct snd_pcm_runtime *runtime = substream->runtime;; area->vm_page_prot = pgprot_noncached(area->vm_page_prot); - area->vm_flags |= VM_IO; - size = area->vm_end - area->vm_start; - offset = area->vm_pgoff << PAGE_SHIFT; - if (io_remap_pfn_range(area, area->vm_start, - (substream->runtime->dma_addr + offset) >> PAGE_SHIFT, - size, area->vm_page_prot)) - return -EAGAIN; - return 0; + return vm_iomap_memory(area, runtime->dma_addr, runtime->dma_bytes); } EXPORT_SYMBOL(snd_pcm_lib_mmap_iomem); From f86b11fbc772b5a4926855c6bc1ff3a3d99995a7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Apr 2013 10:05:39 -0700 Subject: [PATCH 282/303] mtdchar: remove no-longer-used vma helpers With the conversion to vm_iomap_memory(), these vma helpers are no longer used. Signed-off-by: Linus Torvalds --- drivers/mtd/mtdchar.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 61a1b22c05e7..dc571ebc1aa0 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1123,33 +1123,6 @@ static unsigned long mtdchar_get_unmapped_area(struct file *file, } #endif -static inline unsigned long get_vm_size(struct vm_area_struct *vma) -{ - return vma->vm_end - vma->vm_start; -} - -static inline resource_size_t get_vm_offset(struct vm_area_struct *vma) -{ - return (resource_size_t) vma->vm_pgoff << PAGE_SHIFT; -} - -/* - * Set a new vm offset. - * - * Verify that the incoming offset really works as a page offset, - * and that the offset and size fit in a resource_size_t. - */ -static inline int set_vm_offset(struct vm_area_struct *vma, resource_size_t off) -{ - pgoff_t pgoff = off >> PAGE_SHIFT; - if (off != (resource_size_t) pgoff << PAGE_SHIFT) - return -EINVAL; - if (off + get_vm_size(vma) - 1 < off) - return -EINVAL; - vma->vm_pgoff = pgoff; - return 0; -} - /* * set up a mapping for shared memory segments */ From 12fb3dd9dc3c64ba7d64cec977cca9b5fb7b1d4e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Apr 2013 07:19:48 +0000 Subject: [PATCH 283/303] tcp: call tcp_replace_ts_recent() from tcp_ack() commit bd090dfc634d (tcp: tcp_replace_ts_recent() should not be called from tcp_validate_incoming()) introduced a TS ecr bug in slow path processing. 1 A > B P. 1:10001(10000) ack 1 2 B < A . 1:1(0) ack 1 win 257 3 A > B . 1:1001(1000) ack 1 win 227 4 A > B . 1001:2001(1000) ack 1 win 227 (ecr 200 should be ecr 300 in packets 3 & 4) Problem is tcp_ack() can trigger send of new packets (retransmits), reflecting the prior TSval, instead of the TSval contained in the currently processed incoming packet. Fix this by calling tcp_replace_ts_recent() from tcp_ack() after the checks, but before the actions. Reported-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 64 +++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3bd55bad230a..13b9c08fc158 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -113,6 +113,7 @@ int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ +#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -3564,6 +3565,27 @@ static void tcp_send_challenge_ack(struct sock *sk) } } +static void tcp_store_ts_recent(struct tcp_sock *tp) +{ + tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; + tp->rx_opt.ts_recent_stamp = get_seconds(); +} + +static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) +{ + if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { + /* PAWS bug workaround wrt. ACK frames, the PAWS discard + * extra check below makes sure this can only happen + * for pure ACK frames. -DaveM + * + * Not only, also it occurs for expired timestamps. + */ + + if (tcp_paws_check(&tp->rx_opt, 0)) + tcp_store_ts_recent(tp); + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3607,6 +3629,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + if (flag & FLAG_UPDATE_TS_RECENT) + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. @@ -3927,27 +3955,6 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) EXPORT_SYMBOL(tcp_parse_md5sig_option); #endif -static inline void tcp_store_ts_recent(struct tcp_sock *tp) -{ - tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; - tp->rx_opt.ts_recent_stamp = get_seconds(); -} - -static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) -{ - if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { - /* PAWS bug workaround wrt. ACK frames, the PAWS discard - * extra check below makes sure this can only happen - * for pure ACK frames. -DaveM - * - * Not only, also it occurs for expired timestamps. - */ - - if (tcp_paws_check(&tp->rx_opt, 0)) - tcp_store_ts_recent(tp); - } -} - /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM * * It is not fatal. If this ACK does _not_ change critical state (seqs, window) @@ -5543,14 +5550,9 @@ slow_path: return 0; step5: - if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) + if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) goto discard; - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ @@ -5986,7 +5988,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* step 5: check the ACK field */ if (true) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; + int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; switch (sk->sk_state) { case TCP_SYN_RECV: @@ -6137,11 +6140,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } } - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - /* step 6: check the URG bit */ tcp_urg(sk, skb, th); From cea15092f098b7018e89f64a5a14bb71955965d5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Apr 2013 17:33:40 +0100 Subject: [PATCH 284/303] ARM: 7699/1: sched_clock: Add more notrace to prevent recursion cyc_to_sched_clock() is called by sched_clock() and cyc_to_ns() is called by cyc_to_sched_clock(). I suspect that some compilers inline both of these functions into sched_clock() and so we've been getting away without having a notrace marking. It seems that my compiler isn't inlining cyc_to_sched_clock() though, so I'm hitting a recursion bug when I enable the function graph tracer, causing my system to crash. Marking these functions notrace fixes it. Technically cyc_to_ns() doesn't need the notrace because it's already marked inline, but let's just add it so that if we ever remove inline from that function it doesn't blow up. Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/kernel/sched_clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c index bd6f56b9ec21..59d2adb764a9 100644 --- a/arch/arm/kernel/sched_clock.c +++ b/arch/arm/kernel/sched_clock.c @@ -45,12 +45,12 @@ static u32 notrace jiffy_sched_clock_read(void) static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; -static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift) +static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) { return (cyc * mult) >> shift; } -static unsigned long long cyc_to_sched_clock(u32 cyc, u32 mask) +static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask) { u64 epoch_ns; u32 epoch_cyc; From f36391d2790d04993f48da6a45810033a2cdf847 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 19 Apr 2013 17:26:26 -0400 Subject: [PATCH 285/303] sparc64: Fix race in TLB batch processing. As reported by Dave Kleikamp, when we emit cross calls to do batched TLB flush processing we have a race because we do not synchronize on the sibling cpus completing the cross call. So meanwhile the TLB batch can be reset (tb->tlb_nr set to zero, etc.) and either flushes are missed or flushes will flush the wrong addresses. Fix this by using generic infrastructure to synchonize on the completion of the cross call. This first required getting the flush_tlb_pending() call out from switch_to() which operates with locks held and interrupts disabled. The problem is that smp_call_function_many() cannot be invoked with IRQs disabled and this is explicitly checked for with WARN_ON_ONCE(). We get the batch processing outside of locked IRQ disabled sections by using some ideas from the powerpc port. Namely, we only batch inside of arch_{enter,leave}_lazy_mmu_mode() calls. If we're not in such a region, we flush TLBs synchronously. 1) Get rid of xcall_flush_tlb_pending and per-cpu type implementations. 2) Do TLB batch cross calls instead via: smp_call_function_many() tlb_pending_func() __flush_tlb_pending() 3) Batch only in lazy mmu sequences: a) Add 'active' member to struct tlb_batch b) Define __HAVE_ARCH_ENTER_LAZY_MMU_MODE c) Set 'active' in arch_enter_lazy_mmu_mode() d) Run batch and clear 'active' in arch_leave_lazy_mmu_mode() e) Check 'active' in tlb_batch_add_one() and do a synchronous flush if it's clear. 4) Add infrastructure for synchronous TLB page flushes. a) Implement __flush_tlb_page and per-cpu variants, patch as needed. b) Likewise for xcall_flush_tlb_page. c) Implement smp_flush_tlb_page() to invoke the cross-call. d) Wire up global_flush_tlb_page() to the right routine based upon CONFIG_SMP 5) It turns out that singleton batches are very common, 2 out of every 3 batch flushes have only a single entry in them. The batch flush waiting is very expensive, both because of the poll on sibling cpu completeion, as well as because passing the tlb batch pointer to the sibling cpus invokes a shared memory dereference. Therefore, in flush_tlb_pending(), if there is only one entry in the batch perform a completely asynchronous global_flush_tlb_page() instead. Reported-by: Dave Kleikamp Signed-off-by: David S. Miller Acked-by: Dave Kleikamp --- arch/sparc/include/asm/pgtable_64.h | 1 + arch/sparc/include/asm/switch_to_64.h | 3 +- arch/sparc/include/asm/tlbflush_64.h | 37 ++++++-- arch/sparc/kernel/smp_64.c | 43 ++++++++-- arch/sparc/mm/tlb.c | 38 +++++++- arch/sparc/mm/tsb.c | 57 ++++++++---- arch/sparc/mm/ultra.S | 119 ++++++++++++++++++++------ 7 files changed, 242 insertions(+), 56 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 08fcce90316b..7619f2f792af 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -915,6 +915,7 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot); } +#include #include /* We provide our own get_unmapped_area to cope with VA holes and diff --git a/arch/sparc/include/asm/switch_to_64.h b/arch/sparc/include/asm/switch_to_64.h index cad36f56fa03..c7de3323819c 100644 --- a/arch/sparc/include/asm/switch_to_64.h +++ b/arch/sparc/include/asm/switch_to_64.h @@ -18,8 +18,7 @@ do { \ * and 2 stores in this critical code path. -DaveM */ #define switch_to(prev, next, last) \ -do { flush_tlb_pending(); \ - save_and_clear_fpu(); \ +do { save_and_clear_fpu(); \ /* If you are tempted to conditionalize the following */ \ /* so that ASI is only written if it changes, think again. */ \ __asm__ __volatile__("wr %%g0, %0, %%asi" \ diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h index 2ef463494153..f0d6a9700f4c 100644 --- a/arch/sparc/include/asm/tlbflush_64.h +++ b/arch/sparc/include/asm/tlbflush_64.h @@ -11,24 +11,40 @@ struct tlb_batch { struct mm_struct *mm; unsigned long tlb_nr; + unsigned long active; unsigned long vaddrs[TLB_BATCH_NR]; }; extern void flush_tsb_kernel_range(unsigned long start, unsigned long end); extern void flush_tsb_user(struct tlb_batch *tb); +extern void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr); /* TLB flush operations. */ -extern void flush_tlb_pending(void); +static inline void flush_tlb_mm(struct mm_struct *mm) +{ +} -#define flush_tlb_range(vma,start,end) \ - do { (void)(start); flush_tlb_pending(); } while (0) -#define flush_tlb_page(vma,addr) flush_tlb_pending() -#define flush_tlb_mm(mm) flush_tlb_pending() +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE + +extern void flush_tlb_pending(void); +extern void arch_enter_lazy_mmu_mode(void); +extern void arch_leave_lazy_mmu_mode(void); +#define arch_flush_lazy_mmu_mode() do {} while (0) /* Local cpu only. */ extern void __flush_tlb_all(void); - +extern void __flush_tlb_page(unsigned long context, unsigned long vaddr); extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifndef CONFIG_SMP @@ -38,15 +54,24 @@ do { flush_tsb_kernel_range(start,end); \ __flush_tlb_kernel_range(start,end); \ } while (0) +static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) +{ + __flush_tlb_page(CTX_HWBITS(mm->context), vaddr); +} + #else /* CONFIG_SMP */ extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr); #define flush_tlb_kernel_range(start, end) \ do { flush_tsb_kernel_range(start,end); \ smp_flush_tlb_kernel_range(start, end); \ } while (0) +#define global_flush_tlb_page(mm, vaddr) \ + smp_flush_tlb_page(mm, vaddr) + #endif /* ! CONFIG_SMP */ #endif /* _SPARC64_TLBFLUSH_H */ diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 537eb66abd06..ca64d2a86ec0 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -849,7 +849,7 @@ void smp_tsb_sync(struct mm_struct *mm) } extern unsigned long xcall_flush_tlb_mm; -extern unsigned long xcall_flush_tlb_pending; +extern unsigned long xcall_flush_tlb_page; extern unsigned long xcall_flush_tlb_kernel_range; extern unsigned long xcall_fetch_glob_regs; extern unsigned long xcall_fetch_glob_pmu; @@ -1074,19 +1074,52 @@ local_flush_and_out: put_cpu(); } +struct tlb_pending_info { + unsigned long ctx; + unsigned long nr; + unsigned long *vaddrs; +}; + +static void tlb_pending_func(void *info) +{ + struct tlb_pending_info *t = info; + + __flush_tlb_pending(t->ctx, t->nr, t->vaddrs); +} + void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs) { u32 ctx = CTX_HWBITS(mm->context); + struct tlb_pending_info info; + int cpu = get_cpu(); + + info.ctx = ctx; + info.nr = nr; + info.vaddrs = vaddrs; + + if (mm == current->mm && atomic_read(&mm->mm_users) == 1) + cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); + else + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); + + __flush_tlb_pending(ctx, nr, vaddrs); + + put_cpu(); +} + +void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) +{ + unsigned long context = CTX_HWBITS(mm->context); int cpu = get_cpu(); if (mm == current->mm && atomic_read(&mm->mm_users) == 1) cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); else - smp_cross_call_masked(&xcall_flush_tlb_pending, - ctx, nr, (unsigned long) vaddrs, + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, mm_cpumask(mm)); - - __flush_tlb_pending(ctx, nr, vaddrs); + __flush_tlb_page(context, vaddr); put_cpu(); } diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index ba6ae7ffdc2c..272aa4f7657e 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -24,11 +24,17 @@ static DEFINE_PER_CPU(struct tlb_batch, tlb_batch); void flush_tlb_pending(void) { struct tlb_batch *tb = &get_cpu_var(tlb_batch); + struct mm_struct *mm = tb->mm; - if (tb->tlb_nr) { - flush_tsb_user(tb); + if (!tb->tlb_nr) + goto out; - if (CTX_VALID(tb->mm->context)) { + flush_tsb_user(tb); + + if (CTX_VALID(mm->context)) { + if (tb->tlb_nr == 1) { + global_flush_tlb_page(mm, tb->vaddrs[0]); + } else { #ifdef CONFIG_SMP smp_flush_tlb_pending(tb->mm, tb->tlb_nr, &tb->vaddrs[0]); @@ -37,12 +43,30 @@ void flush_tlb_pending(void) tb->tlb_nr, &tb->vaddrs[0]); #endif } - tb->tlb_nr = 0; } + tb->tlb_nr = 0; + +out: put_cpu_var(tlb_batch); } +void arch_enter_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + tb->active = 1; +} + +void arch_leave_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + if (tb->tlb_nr) + flush_tlb_pending(); + tb->active = 0; +} + static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, bool exec) { @@ -60,6 +84,12 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, nr = 0; } + if (!tb->active) { + global_flush_tlb_page(mm, vaddr); + flush_tsb_user_page(mm, vaddr); + return; + } + if (nr == 0) tb->mm = mm; diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 428982b9becf..2cc3bce5ee91 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -7,11 +7,10 @@ #include #include #include -#include -#include -#include #include +#include #include +#include #include extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; @@ -46,23 +45,27 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) } } +static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v, + unsigned long hash_shift, + unsigned long nentries) +{ + unsigned long tag, ent, hash; + + v &= ~0x1UL; + hash = tsb_hash(v, hash_shift, nentries); + ent = tsb + (hash * sizeof(struct tsb)); + tag = (v >> 22UL); + + tsb_flush(ent, tag); +} + static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, unsigned long tsb, unsigned long nentries) { unsigned long i; - for (i = 0; i < tb->tlb_nr; i++) { - unsigned long v = tb->vaddrs[i]; - unsigned long tag, ent, hash; - - v &= ~0x1UL; - - hash = tsb_hash(v, hash_shift, nentries); - ent = tsb + (hash * sizeof(struct tsb)); - tag = (v >> 22UL); - - tsb_flush(ent, tag); - } + for (i = 0; i < tb->tlb_nr; i++) + __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); } void flush_tsb_user(struct tlb_batch *tb) @@ -90,6 +93,30 @@ void flush_tsb_user(struct tlb_batch *tb) spin_unlock_irqrestore(&mm->context.lock, flags); } +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) +{ + unsigned long nentries, base, flags; + + spin_lock_irqsave(&mm->context.lock, flags); + + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries); + } +#endif + spin_unlock_irqrestore(&mm->context.lock, flags); +} + #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index f8e13d421fcb..432aa0cb1b38 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -52,6 +52,33 @@ __flush_tlb_mm: /* 18 insns */ nop nop + .align 32 + .globl __flush_tlb_page +__flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, %pstate + mov SECONDARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + andn %o1, 1, %o3 + be,pn %icc, 1f + or %o3, 0x10, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + retl + wrpr %g7, 0x0, %pstate + nop + nop + nop + nop + .align 32 .globl __flush_tlb_pending __flush_tlb_pending: /* 26 insns */ @@ -203,6 +230,31 @@ __cheetah_flush_tlb_mm: /* 19 insns */ retl wrpr %g7, 0x0, %pstate +__cheetah_flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, 0x0, %pstate + wrpr %g0, 1, %tl + mov PRIMARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o3 + sllx %o3, CTX_PGSZ1_NUC_SHIFT, %o3 + or %o0, %o3, %o0 /* Preserve nucleus page size fields */ + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + be,pn %icc, 1f + andn %o1, 1, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + wrpr %g0, 0, %tl + retl + wrpr %g7, 0x0, %pstate + __cheetah_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 @@ -269,6 +321,20 @@ __hypervisor_flush_tlb_mm: /* 10 insns */ retl nop +__hypervisor_flush_tlb_page: /* 11 insns */ + /* %o0 = context, %o1 = vaddr */ + mov %o0, %g2 + mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ + mov %g2, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + retl + nop + __hypervisor_flush_tlb_pending: /* 16 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ sllx %o1, 3, %g1 @@ -339,6 +405,13 @@ cheetah_patch_cachetlbops: call tlb_patch_one mov 19, %o2 + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__cheetah_flush_tlb_pending), %o1 @@ -397,10 +470,9 @@ xcall_flush_tlb_mm: /* 21 insns */ nop nop - .globl xcall_flush_tlb_pending -xcall_flush_tlb_pending: /* 21 insns */ - /* %g5=context, %g1=nr, %g7=vaddrs[] */ - sllx %g1, 3, %g1 + .globl xcall_flush_tlb_page +xcall_flush_tlb_page: /* 17 insns */ + /* %g5=context, %g1=vaddr */ mov PRIMARY_CONTEXT, %g4 ldxa [%g4] ASI_DMMU, %g2 srlx %g2, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -408,20 +480,16 @@ xcall_flush_tlb_pending: /* 21 insns */ or %g5, %g4, %g5 mov PRIMARY_CONTEXT, %g4 stxa %g5, [%g4] ASI_DMMU -1: sub %g1, (1 << 3), %g1 - ldx [%g7 + %g1], %g5 - andcc %g5, 0x1, %g0 + andcc %g1, 0x1, %g0 be,pn %icc, 2f - - andn %g5, 0x1, %g5 + andn %g1, 0x1, %g5 stxa %g0, [%g5] ASI_IMMU_DEMAP 2: stxa %g0, [%g5] ASI_DMMU_DEMAP membar #Sync - brnz,pt %g1, 1b - nop stxa %g2, [%g4] ASI_DMMU retry nop + nop .globl xcall_flush_tlb_kernel_range xcall_flush_tlb_kernel_range: /* 25 insns */ @@ -656,15 +724,13 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ membar #Sync retry - .globl __hypervisor_xcall_flush_tlb_pending -__hypervisor_xcall_flush_tlb_pending: /* 21 insns */ - /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */ - sllx %g1, 3, %g1 + .globl __hypervisor_xcall_flush_tlb_page +__hypervisor_xcall_flush_tlb_page: /* 17 insns */ + /* %g5=ctx, %g1=vaddr */ mov %o0, %g2 mov %o1, %g3 mov %o2, %g4 -1: sub %g1, (1 << 3), %g1 - ldx [%g7 + %g1], %o0 /* ARG0: virtual address */ + mov %g1, %o0 /* ARG0: virtual address */ mov %g5, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ srlx %o0, PAGE_SHIFT, %o0 @@ -673,8 +739,6 @@ __hypervisor_xcall_flush_tlb_pending: /* 21 insns */ mov HV_MMU_UNMAP_ADDR_TRAP, %g6 brnz,a,pn %o0, __hypervisor_tlb_xcall_error mov %o0, %g5 - brnz,pt %g1, 1b - nop mov %g2, %o0 mov %g3, %o1 mov %g4, %o2 @@ -757,6 +821,13 @@ hypervisor_patch_cachetlbops: call tlb_patch_one mov 10, %o2 + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__hypervisor_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_flush_tlb_page), %o1 + call tlb_patch_one + mov 11, %o2 + sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__hypervisor_flush_tlb_pending), %o1 @@ -788,12 +859,12 @@ hypervisor_patch_cachetlbops: call tlb_patch_one mov 21, %o2 - sethi %hi(xcall_flush_tlb_pending), %o0 - or %o0, %lo(xcall_flush_tlb_pending), %o0 - sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1 - or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1 + sethi %hi(xcall_flush_tlb_page), %o0 + or %o0, %lo(xcall_flush_tlb_page), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 call tlb_patch_one - mov 21, %o2 + mov 17, %o2 sethi %hi(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 From e15465e1808542743627f13d1c0cbb7eacc82b83 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 16 Apr 2013 21:10:38 +0000 Subject: [PATCH 286/303] irda: small read past the end of array in debug code The "reason" can come from skb->data[] and it hasn't been capped so it can be from 0-255 instead of just 0-6. For example in irlmp_state_dtr() the code does: reason = skb->data[3]; ... irlmp_disconnect_indication(self, reason, skb); Also LMREASON has a couple other values which don't have entries in the irlmp_reasons[] array. And 0xff is a valid reason as well which means "unknown". So far as I can see we don't actually care about "reason" except for in the debug code. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- include/net/irda/irlmp.h | 3 ++- net/irda/iriap.c | 3 ++- net/irda/irlmp.c | 10 +++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h index f74109144d3f..f132924cc9da 100644 --- a/include/net/irda/irlmp.h +++ b/include/net/irda/irlmp.h @@ -256,7 +256,8 @@ static inline __u32 irlmp_get_daddr(const struct lsap_cb *self) return (self && self->lap) ? self->lap->daddr : 0; } -extern const char *irlmp_reasons[]; +const char *irlmp_reason_str(LM_REASON reason); + extern int sysctl_discovery_timeout; extern int sysctl_discovery_slots; extern int sysctl_discovery; diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 29340a9a6fb9..e1b37f5a2691 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -303,7 +303,8 @@ static void iriap_disconnect_indication(void *instance, void *sap, { struct iriap_cb *self; - IRDA_DEBUG(4, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); + IRDA_DEBUG(4, "%s(), reason=%s [%d]\n", __func__, + irlmp_reason_str(reason), reason); self = instance; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 6115a44c0a24..1064621da6f6 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -66,8 +66,15 @@ const char *irlmp_reasons[] = { "LM_LAP_RESET", "LM_INIT_DISCONNECT", "ERROR, NOT USED", + "UNKNOWN", }; +const char *irlmp_reason_str(LM_REASON reason) +{ + reason = min_t(size_t, reason, ARRAY_SIZE(irlmp_reasons) - 1); + return irlmp_reasons[reason]; +} + /* * Function irlmp_init (void) * @@ -747,7 +754,8 @@ void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, { struct lsap_cb *lsap; - IRDA_DEBUG(1, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); + IRDA_DEBUG(1, "%s(), reason=%s [%d]\n", __func__, + irlmp_reason_str(reason), reason); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); From cb95ec6261a205bde6451b972fbd6e1581608cae Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 17 Apr 2013 16:49:10 +0000 Subject: [PATCH 287/303] pkt_sched: fix error return code in fw_change_attrs() Fix to return -EINVAL when tb[TCA_FW_MASK] is set and head->mask != 0xFFFFFFFF instead of 0 (ifdef CONFIG_NET_CLS_IND and tb[TCA_FW_INDEV]), as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 1135d8227f9b..9b97172db84a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -204,7 +204,6 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, if (err < 0) return err; - err = -EINVAL; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); tcf_bind_filter(tp, &f->res, base); @@ -218,6 +217,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, } #endif /* CONFIG_NET_CLS_IND */ + err = -EINVAL; if (tb[TCA_FW_MASK]) { mask = nla_get_u32(tb[TCA_FW_MASK]); if (mask != head->mask) From 25e40305d4f4399bc8ecf9c9b7cf43493bb40bbd Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 18 Apr 2013 07:33:34 +0000 Subject: [PATCH 288/303] bonding: mc addresses don't get deleted on enslave failure Add bond_mc_list_flush() after err_detach as that's the first error path after the addresses are added. The main issue is the mc addresses' refcount which only gets bumped up. v2: update log message and don't move code unnecessarily Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index dea8ce20fea4..4cecb80df854 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1901,6 +1901,11 @@ err_dest_symlinks: bond_destroy_slave_symlinks(bond_dev, slave_dev); err_detach: + if (!USES_PRIMARY(bond->params.mode)) { + netif_addr_lock_bh(bond_dev); + bond_mc_list_flush(bond_dev, slave_dev); + netif_addr_unlock_bh(bond_dev); + } write_lock_bh(&bond->lock); bond_detach_slave(bond, new_slave); write_unlock_bh(&bond->lock); From a506e7b479e1215c230e4b87fedc246cf748537f Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 18 Apr 2013 07:33:35 +0000 Subject: [PATCH 289/303] bonding: vlans don't get deleted on enslave failure The main problem is with vid refcount which only gets bumped up. Delete the vlans after err_detach as that's the first error path after the vlans are added. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 4cecb80df854..dd67c49070d7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1906,6 +1906,7 @@ err_detach: bond_mc_list_flush(bond_dev, slave_dev); netif_addr_unlock_bh(bond_dev); } + bond_del_vlans_from_slave(bond, slave_dev); write_lock_bh(&bond->lock); bond_detach_slave(bond, new_slave); write_unlock_bh(&bond->lock); From 3c5913b53fefc9d9e15a2d0f93042766658d9f3f Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 18 Apr 2013 07:33:36 +0000 Subject: [PATCH 290/303] bonding: primary_slave & curr_active_slave are not cleaned on enslave failure On enslave failure primary_slave can point to new_slave which is to be freed, and the same applies to curr_active_slave. So check if this is the case and clean up properly after err_detach because that's the first error code path after they're set. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index dd67c49070d7..1137d5eac450 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1909,7 +1909,17 @@ err_detach: bond_del_vlans_from_slave(bond, slave_dev); write_lock_bh(&bond->lock); bond_detach_slave(bond, new_slave); + if (bond->primary_slave == new_slave) + bond->primary_slave = NULL; write_unlock_bh(&bond->lock); + if (bond->curr_active_slave == new_slave) { + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + bond_change_active_slave(bond, NULL); + bond_select_active_slave(bond); + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); + } err_close: slave_dev->priv_flags &= ~IFF_BONDING; From fc7a72ac86e2956dd405b0c604fea45a2702f567 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 18 Apr 2013 07:33:37 +0000 Subject: [PATCH 291/303] bonding: disable netpoll on enslave failure slave_disable_netpoll() is not called upon enslave failure which would lead to a memory leak. Call slave_disable_netpoll() after err_detach as that's the first error path after enabling netpoll on that slave. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1137d5eac450..ae35b28a39e4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1920,6 +1920,7 @@ err_detach: write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); } + slave_disable_netpoll(new_slave); err_close: slave_dev->priv_flags &= ~IFF_BONDING; From d632ce989c811863a1ce9b1c53dae2cf06b35c49 Mon Sep 17 00:00:00 2001 From: "nikolay@redhat.com" Date: Thu, 18 Apr 2013 07:33:38 +0000 Subject: [PATCH 292/303] bonding: in bond_mc_swap() bond's mc addr list is walked without lock Use netif_addr_lock_bh() to acquire the appropriate lock before walking. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ae35b28a39e4..dbbea0eec134 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -846,8 +846,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, if (bond->dev->flags & IFF_ALLMULTI) dev_set_allmulti(old_active->dev, -1); + netif_addr_lock_bh(bond->dev); netdev_for_each_mc_addr(ha, bond->dev) dev_mc_del(old_active->dev, ha->addr); + netif_addr_unlock_bh(bond->dev); } if (new_active) { @@ -858,8 +860,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, if (bond->dev->flags & IFF_ALLMULTI) dev_set_allmulti(new_active->dev, 1); + netif_addr_lock_bh(bond->dev); netdev_for_each_mc_addr(ha, bond->dev) dev_mc_add(new_active->dev, ha->addr); + netif_addr_unlock_bh(bond->dev); } } From 6ff509af3869ccac69dcf8905fc75b9a76951594 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 18 Apr 2013 12:57:09 +0000 Subject: [PATCH 293/303] net: qmi_wwan: fixup missing ethernet header (firmware bug workaround) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A number of LTE devices from different vendors all suffer from the same firmware bug: Most of the packets received from the device while it is attached to a LTE network will not have an ethernet header. The devices work as expected when attached to 2G or 3G networks, sending an ethernet header with all packets. This driver is not aware of which network the modem attached to, and even if it were there are still some packet types which are always received with the header intact. All devices supported by this driver have severely limited networking capabilities: - can only transmit IPv4, IPv6 and possibly ARP - can only support a single host hardware address at any time - will only do point-to-point communcation with the host Because of this, we are able to reliably identify any bogus raw IP packets by simply looking at the 4 IP version bits. All we need to do is to avoid 4 or 6 in the first digit of the mac address. This workaround ensures this, and fix up the received packets as necessary. Given the distribution of the bug, it is believed that the source is the chipset vendor. The devices which are verified to be affected are: Huawei E392u-12 (Qualcomm MDM9200) Pantech UML290 (Qualcomm MDM9600) Novatel USB551L (Qualcomm MDM9600) Novatel E362 (Qualcomm MDM9600) It is believed that the bug depend on firmware revision, which means that possibly all devices based on the above mentioned chipset may be affected if we consider all available firmware revisions. The information about affected devices and versions is likely incomplete. As the additional overhead for packets not needing this fixup is very small, it is considered acceptable to apply the workaround to all devices handled by this driver. Reported-by: Dan Williams Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 84 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 968d5d50751d..d8a50c781af0 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,82 @@ struct qmi_wwan_state { struct usb_interface *data; }; +/* Make up an ethernet header if the packet doesn't have one. + * + * A firmware bug common among several devices cause them to send raw + * IP packets under some circumstances. There is no way for the + * driver/host to know when this will happen. And even when the bug + * hits, some packets will still arrive with an intact header. + * + * The supported devices are only capably of sending IPv4, IPv6 and + * ARP packets on a point-to-point link. Any packet with an ethernet + * header will have either our address or a broadcast/multicast + * address as destination. ARP packets will always have a header. + * + * This means that this function will reliably add the appropriate + * header iff necessary, provided our hardware address does not start + * with 4 or 6. + */ +static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) +{ + __be16 proto; + + /* usbnet rx_complete guarantees that skb->len is at least + * hard_header_len, so we can inspect the dest address without + * checking skb->len + */ + switch (skb->data[0] & 0xf0) { + case 0x40: + proto = htons(ETH_P_IP); + break; + case 0x60: + proto = htons(ETH_P_IPV6); + break; + default: + /* pass along other packets without modifications */ + return 1; + } + if (skb_headroom(skb) < ETH_HLEN) + return 0; + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + eth_hdr(skb)->h_proto = proto; + memset(eth_hdr(skb)->h_source, 0, ETH_ALEN); + memcpy(eth_hdr(skb)->h_dest, dev->net->dev_addr, ETH_ALEN); + return 1; +} + +/* very simplistic detection of IPv4 or IPv6 headers */ +static bool possibly_iphdr(const char *data) +{ + return (data[0] & 0xd0) == 0x40; +} + +/* disallow addresses which may be confused with IP headers */ +static int qmi_wwan_mac_addr(struct net_device *dev, void *p) +{ + int ret; + struct sockaddr *addr = p; + + ret = eth_prepare_mac_addr_change(dev, p); + if (ret < 0) + return ret; + if (possibly_iphdr(addr->sa_data)) + return -EADDRNOTAVAIL; + eth_commit_mac_addr_change(dev, p); + return 0; +} + +static const struct net_device_ops qmi_wwan_netdev_ops = { + .ndo_open = usbnet_open, + .ndo_stop = usbnet_stop, + .ndo_start_xmit = usbnet_start_xmit, + .ndo_tx_timeout = usbnet_tx_timeout, + .ndo_change_mtu = usbnet_change_mtu, + .ndo_set_mac_address = qmi_wwan_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + /* using a counter to merge subdriver requests with our own into a combined state */ static int qmi_wwan_manage_power(struct usbnet *dev, int on) { @@ -229,6 +306,12 @@ next_desc: usb_driver_release_interface(driver, info->data); } + /* make MAC addr easily distinguishable from an IP header */ + if (possibly_iphdr(dev->net->dev_addr)) { + dev->net->dev_addr[0] |= 0x02; /* set local assignment bit */ + dev->net->dev_addr[0] &= 0xbf; /* clear "IP" bit */ + } + dev->net->netdev_ops = &qmi_wwan_netdev_ops; err: return status; } @@ -307,6 +390,7 @@ static const struct driver_info qmi_wwan_info = { .bind = qmi_wwan_bind, .unbind = qmi_wwan_unbind, .manage_power = qmi_wwan_manage_power, + .rx_fixup = qmi_wwan_rx_fixup, }; #define HUAWEI_VENDOR_ID 0x12D1 From 6483bdc9d76fb98174797516a19d289eb837909e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 18 Apr 2013 12:57:10 +0000 Subject: [PATCH 294/303] net: qmi_wwan: fixup destination address (firmware bug workaround) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Received packets are sometimes addressed to 00:a0:c6:00:00:00 instead of the address the device firmware should have learned from the host: 321.224126 77.16.85.204 -> 148.122.171.134 ICMP 98 Echo (ping) request id=0x4025, seq=64/16384, ttl=64 0000 82 c0 82 c9 f1 67 82 c0 82 c9 f1 67 08 00 45 00 .....g.....g..E. 0010 00 54 00 00 40 00 40 01 57 cc 4d 10 55 cc 94 7a .T..@.@.W.M.U..z 0020 ab 86 08 00 62 fc 40 25 00 40 b2 bc 6e 51 00 00 ....b.@%.@..nQ.. 0030 00 00 6b bd 09 00 00 00 00 00 10 11 12 13 14 15 ..k............. 0040 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 .......... !"#$% 0050 26 27 28 29 2a 2b 2c 2d 2e 2f 30 31 32 33 34 35 &'()*+,-./012345 0060 36 37 67 321.240607 148.122.171.134 -> 77.16.85.204 ICMP 98 Echo (ping) reply id=0x4025, seq=64/16384, ttl=55 0000 00 a0 c6 00 00 00 02 50 f3 00 00 00 08 00 45 00 .......P......E. 0010 00 54 00 56 00 00 37 01 a0 76 94 7a ab 86 4d 10 .T.V..7..v.z..M. 0020 55 cc 00 00 6a fc 40 25 00 40 b2 bc 6e 51 00 00 U...j.@%.@..nQ.. 0030 00 00 6b bd 09 00 00 00 00 00 10 11 12 13 14 15 ..k............. 0040 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 .......... !"#$% 0050 26 27 28 29 2a 2b 2c 2d 2e 2f 30 31 32 33 34 35 &'()*+,-./012345 0060 36 37 67 The bogus address is always the same, and matches the address suggested by many devices as a default address. It is likely a hardcoded firmware default. The circumstances where this bug has been observed indicates that the trigger is related to timing or some other factor the host cannot control. Repeating the exact same configuration sequence that caused it to trigger once, will not necessarily cause it to trigger the next time. Reproducing the bug is therefore difficult. This opens up a possibility that the bug is more common than we can confirm, because affected devices often will work properly again after a reset. A procedure most users are likely to try out before reporting a bug. Unconditionally rewriting the destination address if the first digit of the received packet is 0, is considered an acceptable compromise since we already have to inspect this digit. The simplification will cause unnecessary rewrites if the real address starts with 0, but this is still better than adding additional tests for this particular case. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d8a50c781af0..cff0bbdd9f52 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -68,6 +68,10 @@ struct qmi_wwan_state { * This means that this function will reliably add the appropriate * header iff necessary, provided our hardware address does not start * with 4 or 6. + * + * Another common firmware bug results in all packets being addressed + * to 00:a0:c6:00:00:00 despite the host address being different. + * This function will also fixup such packets. */ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { @@ -84,6 +88,12 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) case 0x60: proto = htons(ETH_P_IPV6); break; + case 0x00: + if (is_multicast_ether_addr(skb->data)) + return 1; + /* possibly bogus destination - rewrite just in case */ + skb_reset_mac_header(skb); + goto fix_dest; default: /* pass along other packets without modifications */ return 1; @@ -94,6 +104,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) skb_reset_mac_header(skb); eth_hdr(skb)->h_proto = proto; memset(eth_hdr(skb)->h_source, 0, ETH_ALEN); +fix_dest: memcpy(eth_hdr(skb)->h_dest, dev->net->dev_addr, ETH_ALEN); return 1; } From cc6ba5fdaabea7a7b28de3ba1e0fe54d92232fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 18 Apr 2013 12:57:11 +0000 Subject: [PATCH 295/303] net: qmi_wwan: prevent duplicate mac address on link (firmware bug workaround) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We normally trust and use the CDC functional descriptors provided by a number of devices. But some of these will erroneously list the address reserved for the device end of the link. Attempting to use this on both the device and host side will naturally not work. Work around this bug by ignoring the functional descriptor and assign a random address instead in this case. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index cff0bbdd9f52..2a3579f67910 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -53,6 +53,9 @@ struct qmi_wwan_state { struct usb_interface *data; }; +/* default ethernet address used by the modem */ +static const u8 default_modem_addr[ETH_ALEN] = {0x02, 0x50, 0xf3}; + /* Make up an ethernet header if the packet doesn't have one. * * A firmware bug common among several devices cause them to send raw @@ -317,6 +320,12 @@ next_desc: usb_driver_release_interface(driver, info->data); } + /* Never use the same address on both ends of the link, even + * if the buggy firmware told us to. + */ + if (!compare_ether_addr(dev->net->dev_addr, default_modem_addr)) + eth_hw_addr_random(dev->net); + /* make MAC addr easily distinguishable from an IP header */ if (possibly_iphdr(dev->net->dev_addr)) { dev->net->dev_addr[0] |= 0x02; /* set local assignment bit */ From ae721f3ab0d9234219edbc7858cf9c5ceda42df2 Mon Sep 17 00:00:00 2001 From: Sritej Velaga Date: Thu, 18 Apr 2013 19:49:52 +0000 Subject: [PATCH 296/303] qlge: Fix receive path to drop error frames o Fix the driver to drop error frames in the receive path o Update error counter which was not getting incremented Signed-off-by: Sritej Velaga Signed-off-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 36 ++++++++++++++++---- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index b13ab544a7eb..8033555e53c2 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -1434,11 +1434,13 @@ map_error: } /* Categorizing receive firmware frame errors */ -static void ql_categorize_rx_err(struct ql_adapter *qdev, u8 rx_err) +static void ql_categorize_rx_err(struct ql_adapter *qdev, u8 rx_err, + struct rx_ring *rx_ring) { struct nic_stats *stats = &qdev->nic_stats; stats->rx_err_count++; + rx_ring->rx_errors++; switch (rx_err & IB_MAC_IOCB_RSP_ERR_MASK) { case IB_MAC_IOCB_RSP_ERR_CODE_ERR: @@ -1474,6 +1476,12 @@ static void ql_process_mac_rx_gro_page(struct ql_adapter *qdev, struct bq_desc *lbq_desc = ql_get_curr_lchunk(qdev, rx_ring); struct napi_struct *napi = &rx_ring->napi; + /* Frame error, so drop the packet. */ + if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) { + ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring); + put_page(lbq_desc->p.pg_chunk.page); + return; + } napi->dev = qdev->ndev; skb = napi_get_frags(napi); @@ -1529,6 +1537,12 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev, addr = lbq_desc->p.pg_chunk.va; prefetch(addr); + /* Frame error, so drop the packet. */ + if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) { + ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring); + goto err_out; + } + /* The max framesize filter on this chip is set higher than * MTU since FCoE uses 2k frames. */ @@ -1614,6 +1628,13 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev, memcpy(skb_put(new_skb, length), skb->data, length); skb = new_skb; + /* Frame error, so drop the packet. */ + if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) { + ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring); + dev_kfree_skb_any(skb); + return; + } + /* loopback self test for ethtool */ if (test_bit(QL_SELFTEST, &qdev->flags)) { ql_check_lb_frame(qdev, skb); @@ -1919,6 +1940,13 @@ static void ql_process_mac_split_rx_intr(struct ql_adapter *qdev, return; } + /* Frame error, so drop the packet. */ + if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) { + ql_categorize_rx_err(qdev, ib_mac_rsp->flags2, rx_ring); + dev_kfree_skb_any(skb); + return; + } + /* The max framesize filter on this chip is set higher than * MTU since FCoE uses 2k frames. */ @@ -2000,12 +2028,6 @@ static unsigned long ql_process_mac_rx_intr(struct ql_adapter *qdev, QL_DUMP_IB_MAC_RSP(ib_mac_rsp); - /* Frame error, so drop the packet. */ - if (ib_mac_rsp->flags2 & IB_MAC_IOCB_RSP_ERR_MASK) { - ql_categorize_rx_err(qdev, ib_mac_rsp->flags2); - return (unsigned long)length; - } - if (ib_mac_rsp->flags4 & IB_MAC_IOCB_RSP_HV) { /* The data and headers are split into * separate buffers. From c5e991af93e37f1b5e166f0b8a9bb99dd29e8a24 Mon Sep 17 00:00:00 2001 From: Jitendra Kalsaria Date: Thu, 18 Apr 2013 19:49:53 +0000 Subject: [PATCH 297/303] qlge: Fix ethtool autoneg advertising. Autoneg is supported on specific port types only. Fix the driver to advertise autoneg based on the port type. Signed-off-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c index 6f316ab23257..0780e039b271 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_ethtool.c @@ -379,13 +379,13 @@ static int ql_get_settings(struct net_device *ndev, ecmd->supported = SUPPORTED_10000baseT_Full; ecmd->advertising = ADVERTISED_10000baseT_Full; - ecmd->autoneg = AUTONEG_ENABLE; ecmd->transceiver = XCVR_EXTERNAL; if ((qdev->link_status & STS_LINK_TYPE_MASK) == STS_LINK_TYPE_10GBASET) { ecmd->supported |= (SUPPORTED_TP | SUPPORTED_Autoneg); ecmd->advertising |= (ADVERTISED_TP | ADVERTISED_Autoneg); ecmd->port = PORT_TP; + ecmd->autoneg = AUTONEG_ENABLE; } else { ecmd->supported |= SUPPORTED_FIBRE; ecmd->advertising |= ADVERTISED_FIBRE; From e393ce57806cb208574abaec78decd5665416895 Mon Sep 17 00:00:00 2001 From: Jitendra Kalsaria Date: Thu, 18 Apr 2013 19:49:54 +0000 Subject: [PATCH 298/303] qlge: Update version to 1.00.00.32. Signed-off-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h index a131d7b5d2fe..7e8d68263963 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge.h +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h @@ -18,7 +18,7 @@ */ #define DRV_NAME "qlge" #define DRV_STRING "QLogic 10 Gigabit PCI-E Ethernet Driver " -#define DRV_VERSION "v1.00.00.31" +#define DRV_VERSION "v1.00.00.32" #define WQ_ADDR_ALIGN 0x3 /* 4 byte alignment */ From b261c20fe089859cc43642e7006921498327fe3d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Apr 2013 08:47:26 +0000 Subject: [PATCH 299/303] net: ax88796: avoid 64 bit arithmetic When building ax88796 on an ARM platform with 64-bit resource_size_t, we currently get drivers/net/ethernet/8390/ax88796.c:875: undefined reference to `__aeabi_uldivmod' because we do a division on the length of the MMIO resource. Since we know that this resource is very short, using an "unsigned long" instead of "resource_size_t" is entirely sufficient, and avoids this link-time error. Cc: Ben Dooks Cc: netdev@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/ax88796.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index cab306a9888e..e1d26433d619 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -828,7 +828,7 @@ static int ax_probe(struct platform_device *pdev) struct ei_device *ei_local; struct ax_device *ax; struct resource *irq, *mem, *mem2; - resource_size_t mem_size, mem2_size = 0; + unsigned long mem_size, mem2_size = 0; int ret = 0; dev = ax__alloc_ei_netdev(sizeof(struct ax_device)); From c846ad9b880ece01bb4d8d07ba917734edf0324f Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 19 Apr 2013 10:45:52 +0000 Subject: [PATCH 300/303] net: rate-limit warn-bad-offload splats. If one does do something unfortunate and allow a bad offload bug into the kernel, this the skb_warn_bad_offload can effectively live-lock the system, filling the logs with the same error over and over. Add rate limitation to this so that box remains otherwise functional in this case. Signed-off-by: Ben Greear Signed-off-by: David S. Miller --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index e7d68ed8aafe..b24ab0e98eb4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2148,6 +2148,9 @@ static void skb_warn_bad_offload(const struct sk_buff *skb) struct net_device *dev = skb->dev; const char *driver = ""; + if (!net_ratelimit()) + return; + if (dev && dev->dev.parent) driver = dev_driver_string(dev->dev.parent); From 74c3e3fcf350b2e7e3eaf9550528ee3f74e44b37 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 19 Apr 2013 16:36:03 -0700 Subject: [PATCH 301/303] x86, microcode: Verify the family before dispatching microcode patching For each CPU vendor that implements CPU microcode patching, there will be a minimum family for which this is implemented. Verify this minimum level of support. This can be done in the dispatch function or early in the application functions. Doing the latter turned out to be somewhat awkward because of the ineviable split between the BSP and the AP paths, and rather than pushing deep into the application functions, do this in the dispatch function. Reported-by: "Bryan O'Donoghue" Suggested-by: Borislav Petkov Cc: Fenghua Yu Link: http://lkml.kernel.org/r/1366392183-4149-1-git-send-email-bryan.odonoghue.lkml@nexus-software.ie --- arch/x86/kernel/microcode_core_early.c | 38 +++++++++++++++++++++----- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c index 577db8417d15..833d51d6ee06 100644 --- a/arch/x86/kernel/microcode_core_early.c +++ b/arch/x86/kernel/microcode_core_early.c @@ -45,9 +45,6 @@ static int __cpuinit x86_vendor(void) u32 eax = 0x00000000; u32 ebx, ecx = 0, edx; - if (!have_cpuid_p()) - return X86_VENDOR_UNKNOWN; - native_cpuid(&eax, &ebx, &ecx, &edx); if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) @@ -59,18 +56,45 @@ static int __cpuinit x86_vendor(void) return X86_VENDOR_UNKNOWN; } +static int __cpuinit x86_family(void) +{ + u32 eax = 0x00000001; + u32 ebx, ecx = 0, edx; + int x86; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + x86 = (eax >> 8) & 0xf; + if (x86 == 15) + x86 += (eax >> 20) & 0xff; + + return x86; +} + void __init load_ucode_bsp(void) { - int vendor = x86_vendor(); + int vendor, x86; - if (vendor == X86_VENDOR_INTEL) + if (!have_cpuid_p()) + return; + + vendor = x86_vendor(); + x86 = x86_family(); + + if (vendor == X86_VENDOR_INTEL && x86 >= 6) load_ucode_intel_bsp(); } void __cpuinit load_ucode_ap(void) { - int vendor = x86_vendor(); + int vendor, x86; - if (vendor == X86_VENDOR_INTEL) + if (!have_cpuid_p()) + return; + + vendor = x86_vendor(); + x86 = x86_family(); + + if (vendor == X86_VENDOR_INTEL && x86 >= 6) load_ucode_intel_ap(); } From 83f1b4ba917db5dc5a061a44b3403ddb6e783494 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Apr 2013 15:32:32 +0000 Subject: [PATCH 302/303] net: fix incorrect credentials passing Commit 257b5358b32f ("scm: Capture the full credentials of the scm sender") changed the credentials passing code to pass in the effective uid/gid instead of the real uid/gid. Obviously this doesn't matter most of the time (since normally they are the same), but it results in differences for suid binaries when the wrong uid/gid ends up being used. This just undoes that (presumably unintentional) part of the commit. Reported-by: Andy Lutomirski Cc: Eric W. Biederman Cc: Serge E. Hallyn Cc: David S. Miller Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/scm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/scm.h b/include/net/scm.h index 975cca01048b..b11708105681 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -56,8 +56,8 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm, scm->pid = get_pid(pid); scm->cred = cred ? get_cred(cred) : NULL; scm->creds.pid = pid_vnr(pid); - scm->creds.uid = cred ? cred->euid : INVALID_UID; - scm->creds.gid = cred ? cred->egid : INVALID_GID; + scm->creds.uid = cred ? cred->uid : INVALID_UID; + scm->creds.gid = cred ? cred->gid : INVALID_GID; } static __inline__ void scm_destroy_cred(struct scm_cookie *scm) From 60d509fa6a9c4653a86ad830e4c4b30360b23f0e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 Apr 2013 14:38:45 -0700 Subject: [PATCH 303/303] Linux 3.9-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ac509b25b701..46263d808876 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Unicycling Gorilla # *DOCUMENTATION*