From d751c169e9a6f0f853346f1184881422bd10b3c2 Mon Sep 17 00:00:00 2001 From: Michael Davidson Date: Thu, 10 Oct 2013 18:39:54 -0700 Subject: [PATCH 001/981] x86, relocs: Add more per-cpu gold special cases The "gold" linker doesn't seem to put some additional per-cpu cases in the right place. Add these to the per-cpu check. Without this, the kASLR patch series fails to correctly apply relocations, and fails to boot. Signed-off-by: Michael Davidson Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/20131011013954.GA28902@www.outflux.net Signed-off-by: H. Peter Anvin --- arch/x86/tools/relocs.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index f7bab68a4b83..71a2533c90d3 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -722,15 +722,23 @@ static void percpu_init(void) /* * Check to see if a symbol lies in the .data..percpu section. - * For some as yet not understood reason the "__init_begin" - * symbol which immediately preceeds the .data..percpu section - * also shows up as it it were part of it so we do an explict - * check for that symbol name and ignore it. + * + * The linker incorrectly associates some symbols with the + * .data..percpu section so we also need to check the symbol + * name to make sure that we classify the symbol correctly. + * + * The GNU linker incorrectly associates: + * __init_begin + * + * The "gold" linker incorrectly associates: + * init_per_cpu__irq_stack_union + * init_per_cpu__gdt_page */ static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) { return (sym->st_shndx == per_cpu_shndx) && - strcmp(symname, "__init_begin"); + strcmp(symname, "__init_begin") && + strncmp(symname, "init_per_cpu_", 13); } From dd78b97367bd575918204cc89107c1479d3fc1a7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 10 Oct 2013 17:18:13 -0700 Subject: [PATCH 002/981] x86, boot: Move CPU flags out of cpucheck Refactor the CPU flags handling out of the cpucheck routines so that they can be reused by the future ASLR routines (in order to detect CPU features like RDRAND and RDTSC). This reworks has_eflag() and has_fpu() to be used on both 32-bit and 64-bit, and refactors the calls to cpuid to make them PIC-safe on 32-bit. Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/1381450698-28710-2-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/boot.h | 10 +-- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/cpuflags.c | 12 ++++ arch/x86/boot/cpucheck.c | 86 ----------------------- arch/x86/boot/cpuflags.c | 104 ++++++++++++++++++++++++++++ arch/x86/boot/cpuflags.h | 19 +++++ 7 files changed, 138 insertions(+), 97 deletions(-) create mode 100644 arch/x86/boot/compressed/cpuflags.c create mode 100644 arch/x86/boot/cpuflags.c create mode 100644 arch/x86/boot/cpuflags.h diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 379814bc41e3..0da2e37b37c3 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -20,7 +20,7 @@ targets := vmlinux.bin setup.bin setup.elf bzImage targets += fdimage fdimage144 fdimage288 image.iso mtools.conf subdir- := compressed -setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o +setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o setup-y += video-mode.o version.o diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index ef72baeff484..50f8c5e0f37e 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -26,9 +26,8 @@ #include #include #include "bitops.h" -#include -#include #include "ctype.h" +#include "cpuflags.h" /* Useful macros */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) @@ -307,14 +306,7 @@ static inline int cmdline_find_option_bool(const char *option) return __cmdline_find_option_bool(cmd_line_ptr, option); } - /* cpu.c, cpucheck.c */ -struct cpu_features { - int level; /* Family, or 64 for x86-64 */ - int model; - u32 flags[NCAPINTS]; -}; -extern struct cpu_features cpu; int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); int validate_cpu(void); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index dcd90df10ab4..3312f1be9fd9 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ - $(obj)/piggy.o + $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c new file mode 100644 index 000000000000..931cba6a4bb0 --- /dev/null +++ b/arch/x86/boot/compressed/cpuflags.c @@ -0,0 +1,12 @@ +#ifdef CONFIG_RANDOMIZE_BASE + +#include "../cpuflags.c" + +bool has_cpuflag(int flag) +{ + get_flags(); + + return test_bit(flag, cpu.flags); +} + +#endif diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 4d3ff037201f..e1f3c166a512 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -28,8 +28,6 @@ #include #include -struct cpu_features cpu; -static u32 cpu_vendor[3]; static u32 err_flags[NCAPINTS]; static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; @@ -69,90 +67,6 @@ static int is_transmeta(void) cpu_vendor[2] == A32('M', 'x', '8', '6'); } -static int has_fpu(void) -{ - u16 fcw = -1, fsw = -1; - u32 cr0; - - asm("movl %%cr0,%0" : "=r" (cr0)); - if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { - cr0 &= ~(X86_CR0_EM|X86_CR0_TS); - asm volatile("movl %0,%%cr0" : : "r" (cr0)); - } - - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); - - return fsw == 0 && (fcw & 0x103f) == 0x003f; -} - -static int has_eflag(u32 mask) -{ - u32 f0, f1; - - asm("pushfl ; " - "pushfl ; " - "popl %0 ; " - "movl %0,%1 ; " - "xorl %2,%1 ; " - "pushl %1 ; " - "popfl ; " - "pushfl ; " - "popl %1 ; " - "popfl" - : "=&r" (f0), "=&r" (f1) - : "ri" (mask)); - - return !!((f0^f1) & mask); -} - -static void get_flags(void) -{ - u32 max_intel_level, max_amd_level; - u32 tfms; - - if (has_fpu()) - set_bit(X86_FEATURE_FPU, cpu.flags); - - if (has_eflag(X86_EFLAGS_ID)) { - asm("cpuid" - : "=a" (max_intel_level), - "=b" (cpu_vendor[0]), - "=d" (cpu_vendor[1]), - "=c" (cpu_vendor[2]) - : "a" (0)); - - if (max_intel_level >= 0x00000001 && - max_intel_level <= 0x0000ffff) { - asm("cpuid" - : "=a" (tfms), - "=c" (cpu.flags[4]), - "=d" (cpu.flags[0]) - : "a" (0x00000001) - : "ebx"); - cpu.level = (tfms >> 8) & 15; - cpu.model = (tfms >> 4) & 15; - if (cpu.level >= 6) - cpu.model += ((tfms >> 16) & 0xf) << 4; - } - - asm("cpuid" - : "=a" (max_amd_level) - : "a" (0x80000000) - : "ebx", "ecx", "edx"); - - if (max_amd_level >= 0x80000001 && - max_amd_level <= 0x8000ffff) { - u32 eax = 0x80000001; - asm("cpuid" - : "+a" (eax), - "=c" (cpu.flags[6]), - "=d" (cpu.flags[1]) - : : "ebx"); - } - } -} - /* Returns a bitmask of which words we have error bits in */ static int check_flags(void) { diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c new file mode 100644 index 000000000000..b02544a2bce0 --- /dev/null +++ b/arch/x86/boot/cpuflags.c @@ -0,0 +1,104 @@ +#include +#include "bitops.h" + +#include +#include +#include +#include "cpuflags.h" + +struct cpu_features cpu; +u32 cpu_vendor[3]; + +static bool loaded_flags; + +static int has_fpu(void) +{ + u16 fcw = -1, fsw = -1; + unsigned long cr0; + + asm volatile("mov %%cr0,%0" : "=r" (cr0)); + if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { + cr0 &= ~(X86_CR0_EM|X86_CR0_TS); + asm volatile("mov %0,%%cr0" : : "r" (cr0)); + } + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + return fsw == 0 && (fcw & 0x103f) == 0x003f; +} + +int has_eflag(unsigned long mask) +{ + unsigned long f0, f1; + + asm volatile("pushf \n\t" + "pushf \n\t" + "pop %0 \n\t" + "mov %0,%1 \n\t" + "xor %2,%1 \n\t" + "push %1 \n\t" + "popf \n\t" + "pushf \n\t" + "pop %1 \n\t" + "popf" + : "=&r" (f0), "=&r" (f1) + : "ri" (mask)); + + return !!((f0^f1) & mask); +} + +/* Handle x86_32 PIC using ebx. */ +#if defined(__i386__) && defined(__PIC__) +# define EBX_REG "=r" +#else +# define EBX_REG "=b" +#endif + +static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d) +{ + asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t" + "cpuid \n\t" + ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t" + : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b) + : "a" (id) + ); +} + +void get_flags(void) +{ + u32 max_intel_level, max_amd_level; + u32 tfms; + u32 ignored; + + if (loaded_flags) + return; + loaded_flags = true; + + if (has_fpu()) + set_bit(X86_FEATURE_FPU, cpu.flags); + + if (has_eflag(X86_EFLAGS_ID)) { + cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2], + &cpu_vendor[1]); + + if (max_intel_level >= 0x00000001 && + max_intel_level <= 0x0000ffff) { + cpuid(0x1, &tfms, &ignored, &cpu.flags[4], + &cpu.flags[0]); + cpu.level = (tfms >> 8) & 15; + cpu.model = (tfms >> 4) & 15; + if (cpu.level >= 6) + cpu.model += ((tfms >> 16) & 0xf) << 4; + } + + cpuid(0x80000000, &max_amd_level, &ignored, &ignored, + &ignored); + + if (max_amd_level >= 0x80000001 && + max_amd_level <= 0x8000ffff) { + cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], + &cpu.flags[1]); + } + } +} diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h new file mode 100644 index 000000000000..9bb4e25f7317 --- /dev/null +++ b/arch/x86/boot/cpuflags.h @@ -0,0 +1,19 @@ +#ifndef BOOT_CPUFLAGS_H +#define BOOT_CPUFLAGS_H + +#include +#include + +struct cpu_features { + int level; /* Family, or 64 for x86-64 */ + int model; + u32 flags[NCAPINTS]; +}; + +extern struct cpu_features cpu; +extern u32 cpu_vendor[3]; + +int has_eflag(unsigned long mask); +void get_flags(void); + +#endif From 8ab3820fd5b2896d66da7bb2a906bc382e63e7bc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 10 Oct 2013 17:18:14 -0700 Subject: [PATCH 003/981] x86, kaslr: Return location from decompress_kernel This allows decompress_kernel to return a new location for the kernel to be relocated to. Additionally, enforces CONFIG_PHYSICAL_START as the minimum relocation position when building with CONFIG_RELOCATABLE. With CONFIG_RANDOMIZE_BASE set, the choose_kernel_location routine will select a new location to decompress the kernel, though here it is presently a no-op. The kernel command line option "nokaslr" is introduced to bypass these routines. Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/1381450698-28710-3-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 4 +++ arch/x86/Kconfig | 38 ++++++++++++++++++++++++++--- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/aslr.c | 23 +++++++++++++++++ arch/x86/boot/compressed/cmdline.c | 2 +- arch/x86/boot/compressed/head_32.S | 10 +++++--- arch/x86/boot/compressed/head_64.S | 16 +++++++----- arch/x86/boot/compressed/misc.c | 8 ++++-- arch/x86/boot/compressed/misc.h | 27 +++++++++++++++----- 9 files changed, 106 insertions(+), 24 deletions(-) create mode 100644 arch/x86/boot/compressed/aslr.c diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fcbb736d55fe..773fc4c077b4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1975,6 +1975,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. noapic [SMP,APIC] Tells the kernel to not make use of any IOAPICs that may be present in the system. + nokaslr [X86] + Disable kernel base offset ASLR (Address Space + Layout Randomization) if built into the kernel. + noautogroup Disable scheduler automatic task group creation. nobats [PPC] Do not use BATs for mapping kernel lowmem diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ee2fb9d37745..992701d4d4f8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1722,16 +1722,46 @@ config RELOCATABLE Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address it has been loaded at and the compile time physical address - (CONFIG_PHYSICAL_START) is ignored. + (CONFIG_PHYSICAL_START) is used as the minimum location. -# Relocation on x86-32 needs some additional build support +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + depends on RELOCATABLE + depends on !HIBERNATION + default n + ---help--- + Randomizes the physical and virtual address at which the + kernel image is decompressed, as a security feature that + deters exploit attempts relying on knowledge of the location + of kernel internals. + + Entropy is generated using the RDRAND instruction if it + is supported. If not, then RDTSC is used, if supported. If + neither RDRAND nor RDTSC are supported, then no randomness + is introduced. + + The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET, + and aligned according to PHYSICAL_ALIGN. + +config RANDOMIZE_BASE_MAX_OFFSET + hex "Maximum ASLR offset allowed" + depends on RANDOMIZE_BASE + default "0x10000000" + range 0x0 0x10000000 + ---help--- + Determines the maximal offset in bytes that will be applied to the + kernel when Address Space Layout Randomization (ASLR) is active. + Must be less than or equal to the actual physical memory on the + system. This must be a power of two. + +# Relocation on x86 needs some additional build support config X86_NEED_RELOCS def_bool y - depends on X86_32 && RELOCATABLE + depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE) config PHYSICAL_ALIGN hex "Alignment value to which kernel should be aligned" - default "0x1000000" + default "0x200000" range 0x2000 0x1000000 if X86_32 range 0x200000 0x1000000 if X86_64 ---help--- diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3312f1be9fd9..ae8b5dbbd8c5 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ - $(obj)/piggy.o $(obj)/cpuflags.o + $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c new file mode 100644 index 000000000000..b73cc66d201e --- /dev/null +++ b/arch/x86/boot/compressed/aslr.c @@ -0,0 +1,23 @@ +#include "misc.h" + +#ifdef CONFIG_RANDOMIZE_BASE + +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + unsigned long choice = (unsigned long)output; + + if (cmdline_find_option_bool("nokaslr")) { + debug_putstr("KASLR disabled...\n"); + goto out; + } + + /* XXX: choose random location. */ + +out: + return (unsigned char *)choice; +} + +#endif /* CONFIG_RANDOMIZE_BASE */ diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index bffd73b45b1f..b68e3033e6b9 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -1,6 +1,6 @@ #include "misc.h" -#ifdef CONFIG_EARLY_PRINTK +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE static unsigned long fs; static inline void set_fs(unsigned long seg) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 5d6f6891b188..9116aac232c7 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -117,9 +117,11 @@ preferred_addr: addl %eax, %ebx notl %eax andl %eax, %ebx -#else - movl $LOAD_PHYSICAL_ADDR, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jge 1f #endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: /* Target address to relocate to for decompression */ addl $z_extract_offset, %ebx @@ -191,14 +193,14 @@ relocated: leal boot_heap(%ebx), %eax pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call decompress_kernel + call decompress_kernel /* returns kernel location in %eax */ addl $24, %esp /* * Jump to the decompressed kernel. */ xorl %ebx, %ebx - jmp *%ebp + jmp *%eax /* * Stack and heap for uncompression diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c337422b575d..c5c1ae0997e7 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -94,9 +94,11 @@ ENTRY(startup_32) addl %eax, %ebx notl %eax andl %eax, %ebx -#else - movl $LOAD_PHYSICAL_ADDR, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jge 1f #endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: /* Target address to relocate to for decompression */ addl $z_extract_offset, %ebx @@ -269,9 +271,11 @@ preferred_addr: addq %rax, %rbp notq %rax andq %rax, %rbp -#else - movq $LOAD_PHYSICAL_ADDR, %rbp + cmpq $LOAD_PHYSICAL_ADDR, %rbp + jge 1f #endif + movq $LOAD_PHYSICAL_ADDR, %rbp +1: /* Target address to relocate to for decompression */ leaq z_extract_offset(%rbp), %rbx @@ -339,13 +343,13 @@ relocated: movl $z_input_len, %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movq $z_output_len, %r9 /* decompressed length */ - call decompress_kernel + call decompress_kernel /* returns kernel location in %rax */ popq %rsi /* * Jump to the decompressed kernel. */ - jmp *%rbp + jmp *%rax .code32 no_longmode: diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 434f077d2c4d..71387685dc16 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -395,7 +395,7 @@ static void parse_elf(void *output) free(phdrs); } -asmlinkage void decompress_kernel(void *rmode, memptr heap, +asmlinkage void *decompress_kernel(void *rmode, memptr heap, unsigned char *input_data, unsigned long input_len, unsigned char *output, @@ -422,6 +422,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + output = choose_kernel_location(input_data, input_len, + output, output_len); + + /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) error("Destination address inappropriately aligned"); #ifdef CONFIG_X86_64 @@ -441,5 +445,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, parse_elf(output); handle_relocations(output, output_len); debug_putstr("done.\nBooting the kernel.\n"); - return; + return output; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 674019d8e235..9077af7fd0b8 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -39,23 +39,38 @@ static inline void debug_putstr(const char *s) #endif -#ifdef CONFIG_EARLY_PRINTK - +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE /* cmdline.c */ int cmdline_find_option(const char *option, char *buffer, int bufsize); int cmdline_find_option_bool(const char *option); +#endif + +#if CONFIG_RANDOMIZE_BASE +/* aslr.c */ +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size); +#else +static inline +unsigned char *choose_kernel_location(unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + return output; +} +#endif + +#ifdef CONFIG_EARLY_PRINTK /* early_serial_console.c */ extern int early_serial_base; void console_init(void); - #else - -/* early_serial_console.c */ static const int early_serial_base; static inline void console_init(void) { } - #endif #endif From 5bfce5ef55cbe78ee2ee6e97f2e26a8a582008f3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 10 Oct 2013 17:18:15 -0700 Subject: [PATCH 004/981] x86, kaslr: Provide randomness functions Adds potential sources of randomness: RDRAND, RDTSC, or the i8254. This moves the pre-alternatives inline rdrand function into the header so both pieces of code can use it. Availability of RDRAND is then controlled by CONFIG_ARCH_RANDOM, if someone wants to disable it even for kASLR. Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/1381450698-28710-4-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/aslr.c | 53 +++++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 2 ++ arch/x86/include/asm/archrandom.h | 21 ++++++++++++ arch/x86/kernel/cpu/rdrand.c | 14 -------- 4 files changed, 76 insertions(+), 14 deletions(-) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index b73cc66d201e..14b24e0e5496 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -1,6 +1,59 @@ #include "misc.h" #ifdef CONFIG_RANDOMIZE_BASE +#include +#include + +#define I8254_PORT_CONTROL 0x43 +#define I8254_PORT_COUNTER0 0x40 +#define I8254_CMD_READBACK 0xC0 +#define I8254_SELECT_COUNTER0 0x02 +#define I8254_STATUS_NOTREADY 0x40 +static inline u16 i8254(void) +{ + u16 status, timer; + + do { + outb(I8254_PORT_CONTROL, + I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + status = inb(I8254_PORT_COUNTER0); + timer = inb(I8254_PORT_COUNTER0); + timer |= inb(I8254_PORT_COUNTER0) << 8; + } while (status & I8254_STATUS_NOTREADY); + + return timer; +} + +static unsigned long get_random_long(void) +{ + unsigned long random; + + if (has_cpuflag(X86_FEATURE_RDRAND)) { + debug_putstr("KASLR using RDRAND...\n"); + if (rdrand_long(&random)) + return random; + } + + if (has_cpuflag(X86_FEATURE_TSC)) { + uint32_t raw; + + debug_putstr("KASLR using RDTSC...\n"); + rdtscl(raw); + + /* Only use the low bits of rdtsc. */ + random = raw & 0xffff; + } else { + debug_putstr("KASLR using i8254...\n"); + random = i8254(); + } + + /* Extend timer bits poorly... */ + random |= (random << 16); +#ifdef CONFIG_X86_64 + random |= (random << 32); +#endif + return random; +} unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9077af7fd0b8..0782eb0b6e30 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -52,6 +52,8 @@ unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size); +/* cpuflags.c */ +bool has_cpuflag(int flag); #else static inline unsigned char *choose_kernel_location(unsigned char *input, diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 0d9ec770f2f8..e6a92455740e 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -39,6 +39,20 @@ #ifdef CONFIG_ARCH_RANDOM +/* Instead of arch_get_random_long() when alternatives haven't run. */ +static inline int rdrand_long(unsigned long *v) +{ + int ok; + asm volatile("1: " RDRAND_LONG "\n\t" + "jc 2f\n\t" + "decl %0\n\t" + "jnz 1b\n\t" + "2:" + : "=r" (ok), "=a" (*v) + : "0" (RDRAND_RETRY_LOOPS)); + return ok; +} + #define GET_RANDOM(name, type, rdrand, nop) \ static inline int name(type *v) \ { \ @@ -68,6 +82,13 @@ GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); #endif /* CONFIG_X86_64 */ +#else + +static inline int rdrand_long(unsigned long *v) +{ + return 0; +} + #endif /* CONFIG_ARCH_RANDOM */ extern void x86_init_rdrand(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 88db010845cb..384df5105fbc 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s) } __setup("nordrand", x86_rdrand_setup); -/* We can't use arch_get_random_long() here since alternatives haven't run */ -static inline int rdrand_long(unsigned long *v) -{ - int ok; - asm volatile("1: " RDRAND_LONG "\n\t" - "jc 2f\n\t" - "decl %0\n\t" - "jnz 1b\n\t" - "2:" - : "=r" (ok), "=a" (*v) - : "0" (RDRAND_RETRY_LOOPS)); - return ok; -} - /* * Force a reseed cycle; we are architecturally guaranteed a reseed * after no more than 512 128-bit chunks of random data. This also From 82fa9637a2ba285bcc7c5050c73010b2c1b3d803 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 10 Oct 2013 17:18:16 -0700 Subject: [PATCH 005/981] x86, kaslr: Select random position from e820 maps Counts available alignment positions across all e820 maps, and chooses one randomly for the new kernel base address, making sure not to collide with unsafe memory areas. Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/1381450698-28710-5-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/aslr.c | 193 +++++++++++++++++++++++++++++++- arch/x86/boot/compressed/misc.c | 10 +- arch/x86/boot/compressed/misc.h | 8 ++ 3 files changed, 202 insertions(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 14b24e0e5496..05957986d123 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -3,6 +3,7 @@ #ifdef CONFIG_RANDOMIZE_BASE #include #include +#include #define I8254_PORT_CONTROL 0x43 #define I8254_PORT_COUNTER0 0x40 @@ -55,20 +56,210 @@ static unsigned long get_random_long(void) return random; } +struct mem_vector { + unsigned long start; + unsigned long size; +}; + +#define MEM_AVOID_MAX 5 +struct mem_vector mem_avoid[MEM_AVOID_MAX]; + +static bool mem_contains(struct mem_vector *region, struct mem_vector *item) +{ + /* Item at least partially before region. */ + if (item->start < region->start) + return false; + /* Item at least partially after region. */ + if (item->start + item->size > region->start + region->size) + return false; + return true; +} + +static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) +{ + /* Item one is entirely before item two. */ + if (one->start + one->size <= two->start) + return false; + /* Item one is entirely after item two. */ + if (one->start >= two->start + two->size) + return false; + return true; +} + +static void mem_avoid_init(unsigned long input, unsigned long input_size, + unsigned long output, unsigned long output_size) +{ + u64 initrd_start, initrd_size; + u64 cmd_line, cmd_line_size; + unsigned long unsafe, unsafe_len; + char *ptr; + + /* + * Avoid the region that is unsafe to overlap during + * decompression (see calculations at top of misc.c). + */ + unsafe_len = (output_size >> 12) + 32768 + 18; + unsafe = (unsigned long)input + input_size - unsafe_len; + mem_avoid[0].start = unsafe; + mem_avoid[0].size = unsafe_len; + + /* Avoid initrd. */ + initrd_start = (u64)real_mode->ext_ramdisk_image << 32; + initrd_start |= real_mode->hdr.ramdisk_image; + initrd_size = (u64)real_mode->ext_ramdisk_size << 32; + initrd_size |= real_mode->hdr.ramdisk_size; + mem_avoid[1].start = initrd_start; + mem_avoid[1].size = initrd_size; + + /* Avoid kernel command line. */ + cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; + cmd_line |= real_mode->hdr.cmd_line_ptr; + /* Calculate size of cmd_line. */ + ptr = (char *)(unsigned long)cmd_line; + for (cmd_line_size = 0; ptr[cmd_line_size++]; ) + ; + mem_avoid[2].start = cmd_line; + mem_avoid[2].size = cmd_line_size; + + /* Avoid heap memory. */ + mem_avoid[3].start = (unsigned long)free_mem_ptr; + mem_avoid[3].size = BOOT_HEAP_SIZE; + + /* Avoid stack memory. */ + mem_avoid[4].start = (unsigned long)free_mem_end_ptr; + mem_avoid[4].size = BOOT_STACK_SIZE; +} + +/* Does this memory vector overlap a known avoided area? */ +bool mem_avoid_overlap(struct mem_vector *img) +{ + int i; + + for (i = 0; i < MEM_AVOID_MAX; i++) { + if (mem_overlaps(img, &mem_avoid[i])) + return true; + } + + return false; +} + +unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN]; +unsigned long slot_max = 0; + +static void slots_append(unsigned long addr) +{ + /* Overflowing the slots list should be impossible. */ + if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / + CONFIG_PHYSICAL_ALIGN) + return; + + slots[slot_max++] = addr; +} + +static unsigned long slots_fetch_random(void) +{ + /* Handle case of no slots stored. */ + if (slot_max == 0) + return 0; + + return slots[get_random_long() % slot_max]; +} + +static void process_e820_entry(struct e820entry *entry, + unsigned long minimum, + unsigned long image_size) +{ + struct mem_vector region, img; + + /* Skip non-RAM entries. */ + if (entry->type != E820_RAM) + return; + + /* Ignore entries entirely above our maximum. */ + if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + return; + + /* Ignore entries entirely below our minimum. */ + if (entry->addr + entry->size < minimum) + return; + + region.start = entry->addr; + region.size = entry->size; + + /* Potentially raise address to minimum location. */ + if (region.start < minimum) + region.start = minimum; + + /* Potentially raise address to meet alignment requirements. */ + region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); + + /* Did we raise the address above the bounds of this e820 region? */ + if (region.start > entry->addr + entry->size) + return; + + /* Reduce size by any delta from the original address. */ + region.size -= region.start - entry->addr; + + /* Reduce maximum size to fit end of image within maximum limit. */ + if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; + + /* Walk each aligned slot and check for avoided areas. */ + for (img.start = region.start, img.size = image_size ; + mem_contains(®ion, &img) ; + img.start += CONFIG_PHYSICAL_ALIGN) { + if (mem_avoid_overlap(&img)) + continue; + slots_append(img.start); + } +} + +static unsigned long find_random_addr(unsigned long minimum, + unsigned long size) +{ + int i; + unsigned long addr; + + /* Make sure minimum is aligned. */ + minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); + + /* Verify potential e820 positions, appending to slots list. */ + for (i = 0; i < real_mode->e820_entries; i++) { + process_e820_entry(&real_mode->e820_map[i], minimum, size); + } + + return slots_fetch_random(); +} + unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) { unsigned long choice = (unsigned long)output; + unsigned long random; if (cmdline_find_option_bool("nokaslr")) { debug_putstr("KASLR disabled...\n"); goto out; } - /* XXX: choose random location. */ + /* Record the various known unsafe memory ranges. */ + mem_avoid_init((unsigned long)input, input_size, + (unsigned long)output, output_size); + /* Walk e820 and find a random address. */ + random = find_random_addr(choice, output_size); + if (!random) { + debug_putstr("KASLR could not find suitable E820 region...\n"); + goto out; + } + + /* Always enforce the minimum. */ + if (random < choice) + goto out; + + choice = random; out: return (unsigned char *)choice; } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 71387685dc16..196eaf373a06 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -112,14 +112,8 @@ struct boot_params *real_mode; /* Pointer to real-mode data */ void *memset(void *s, int c, size_t n); void *memcpy(void *dest, const void *src, size_t n); -#ifdef CONFIG_X86_64 -#define memptr long -#else -#define memptr unsigned -#endif - -static memptr free_mem_ptr; -static memptr free_mem_end_ptr; +memptr free_mem_ptr; +memptr free_mem_end_ptr; static char *vidmem; static int vidport; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 0782eb0b6e30..24e3e569a13c 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -23,7 +23,15 @@ #define BOOT_BOOT_H #include "../ctype.h" +#ifdef CONFIG_X86_64 +#define memptr long +#else +#define memptr unsigned +#endif + /* misc.c */ +extern memptr free_mem_ptr; +extern memptr free_mem_end_ptr; extern struct boot_params *real_mode; /* Pointer to real-mode data */ void __putstr(const char *s); #define error_putstr(__x) __putstr(__x) From f32360ef6608434a032dc7ad262d45e9693c27f3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 10 Oct 2013 17:18:17 -0700 Subject: [PATCH 006/981] x86, kaslr: Report kernel offset on panic When the system panics, include the kernel offset in the report to assist in debugging. Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/1381450698-28710-6-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f0de6294b955..1708862fc40d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -823,6 +823,20 @@ static void __init trim_low_memory_range(void) memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); } +/* + * Dump out kernel offset information on panic. + */ +static int +dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) +{ + pr_emerg("Kernel Offset: 0x%lx from 0x%lx " + "(relocation range: 0x%lx-0x%lx)\n", + (unsigned long)&_text - __START_KERNEL, __START_KERNEL, + __START_KERNEL_map, MODULES_VADDR-1); + + return 0; +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -1242,3 +1256,15 @@ void __init i386_reserve_resources(void) } #endif /* CONFIG_X86_32 */ + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); From 6145cfe394a7f138f6b64491c5663f97dba12450 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 10 Oct 2013 17:18:18 -0700 Subject: [PATCH 007/981] x86, kaslr: Raise the maximum virtual address to -1 GiB on x86_64 On 64-bit, this raises the maximum location to -1 GiB (from -1.5 GiB), the upper limit currently, since the kernel fixmap page mappings need to be moved to use the other 1 GiB (which would be the theoretical limit when building with -mcmodel=kernel). Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/1381450698-28710-7-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 16 +++++++++++++--- arch/x86/include/asm/page_64_types.h | 15 ++++++++++++--- arch/x86/include/asm/pgtable_64_types.h | 2 +- arch/x86/mm/init_32.c | 3 +++ 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 992701d4d4f8..51f439953d23 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1746,13 +1746,23 @@ config RANDOMIZE_BASE config RANDOMIZE_BASE_MAX_OFFSET hex "Maximum ASLR offset allowed" depends on RANDOMIZE_BASE - default "0x10000000" - range 0x0 0x10000000 + range 0x0 0x20000000 if X86_32 + default "0x20000000" if X86_32 + range 0x0 0x40000000 if X86_64 + default "0x40000000" if X86_64 ---help--- Determines the maximal offset in bytes that will be applied to the kernel when Address Space Layout Randomization (ASLR) is active. Must be less than or equal to the actual physical memory on the - system. This must be a power of two. + system. This must be a multiple of CONFIG_PHYSICAL_ALIGN. + + On 32-bit this is limited to 512MiB. + + On 64-bit this is limited by how the kernel fixmap page table is + positioned, so this cannot be larger that 1GiB currently. Normally + there is a 512MiB to 1.5GiB split between kernel and modules. When + this is raised above the 512MiB default, the modules area will + shrink to compensate, up to the current maximum 1GiB to 1GiB split. # Relocation on x86 needs some additional build support config X86_NEED_RELOCS diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 43dcd804ebd5..8de6d9cf3b95 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -39,9 +39,18 @@ #define __VIRTUAL_MASK_SHIFT 47 /* - * Kernel image size is limited to 512 MB (see level2_kernel_pgt in - * arch/x86/kernel/head_64.S), and it is mapped here: + * Kernel image size is limited to 1GiB due to the fixmap living in the + * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use + * 512MiB by default, leaving 1.5GiB for modules once the page tables + * are fully set up. If kernel ASLR is configured, it can extend the + * kernel page table mapping, reducing the size of the modules area. */ -#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) +#define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024) +#if defined(CONFIG_RANDOMIZE_BASE) && \ + CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT +#define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET +#else +#define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT +#endif #endif /* _ASM_X86_PAGE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2d883440cb9a..c883bf726398 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -58,7 +58,7 @@ typedef struct { pteval_t pte; } pte_t; #define VMALLOC_START _AC(0xffffc90000000000, UL) #define VMALLOC_END _AC(0xffffe8ffffffffff, UL) #define VMEMMAP_START _AC(0xffffea0000000000, UL) -#define MODULES_VADDR _AC(0xffffffffa0000000, UL) +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 4287f1ffba7e..5bdc5430597c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -806,6 +806,9 @@ void __init mem_init(void) BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); #undef high_memory #undef __FIXADDR_TOP +#ifdef CONFIG_RANDOMIZE_BASE + BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE); +#endif #ifdef CONFIG_HIGHMEM BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); From 6e6a4932b0f569b1a5bb4fcbf5dde1b1a42f01bb Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 13 Oct 2013 04:08:56 -0700 Subject: [PATCH 008/981] x86, boot: Rename get_flags() and check_flags() to *_cpuflags() When a function is used in more than one file it may not be possible to immediately tell from context what the intended meaning is. As such, it is more important that the naming be self-evident. Thus, change get_flags() to get_cpuflags(). For consistency, change check_flags() to check_cpuflags() even though it is only used in cpucheck.c. Link: http://lkml.kernel.org/r/1381450698-28710-2-git-send-email-keescook@chromium.org Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/cpuflags.c | 2 +- arch/x86/boot/cpucheck.c | 14 +++++++------- arch/x86/boot/cpuflags.c | 2 +- arch/x86/boot/cpuflags.h | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c index 931cba6a4bb0..aa313466118b 100644 --- a/arch/x86/boot/compressed/cpuflags.c +++ b/arch/x86/boot/compressed/cpuflags.c @@ -4,7 +4,7 @@ bool has_cpuflag(int flag) { - get_flags(); + get_cpuflags(); return test_bit(flag, cpu.flags); } diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index e1f3c166a512..100a9a10076a 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -68,7 +68,7 @@ static int is_transmeta(void) } /* Returns a bitmask of which words we have error bits in */ -static int check_flags(void) +static int check_cpuflags(void) { u32 err; int i; @@ -101,8 +101,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) if (has_eflag(X86_EFLAGS_AC)) cpu.level = 4; - get_flags(); - err = check_flags(); + get_cpuflags(); + err = check_cpuflags(); if (test_bit(X86_FEATURE_LM, cpu.flags)) cpu.level = 64; @@ -121,8 +121,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) eax &= ~(1 << 15); asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); - get_flags(); /* Make sure it really did something */ - err = check_flags(); + get_cpuflags(); /* Make sure it really did something */ + err = check_cpuflags(); } else if (err == 0x01 && !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && is_centaur() && cpu.model >= 6) { @@ -137,7 +137,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); set_bit(X86_FEATURE_CX8, cpu.flags); - err = check_flags(); + err = check_cpuflags(); } else if (err == 0x01 && is_transmeta()) { /* Transmeta might have masked feature bits in word 0 */ @@ -152,7 +152,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) : : "ecx", "ebx"); asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); - err = check_flags(); + err = check_cpuflags(); } if (err_flags_ptr) diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index b02544a2bce0..a9fcb7cfb241 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c @@ -65,7 +65,7 @@ static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d) ); } -void get_flags(void) +void get_cpuflags(void) { u32 max_intel_level, max_amd_level; u32 tfms; diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h index 9bb4e25f7317..ea97697e51e4 100644 --- a/arch/x86/boot/cpuflags.h +++ b/arch/x86/boot/cpuflags.h @@ -14,6 +14,6 @@ extern struct cpu_features cpu; extern u32 cpu_vendor[3]; int has_eflag(unsigned long mask); -void get_flags(void); +void get_cpuflags(void); #endif From aec58bafaf89279522c44ec8ca9211eabb2b6976 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Oct 2013 23:43:14 -0700 Subject: [PATCH 009/981] x86/relocs: Add percpu fixup for GNU ld 2.23 The GNU linker tries to put __per_cpu_load into the percpu area, resulting in a lack of its relocation. Force this symbol to be relocated. Seen starting with GNU ld 2.23 and later. Reported-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Michael Davidson Cc: Cong Ding Link: http://lkml.kernel.org/r/20131016064314.GA2739@www.outflux.net Signed-off-by: Ingo Molnar --- arch/x86/tools/relocs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 71a2533c90d3..11f9285a2ff6 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -729,6 +729,7 @@ static void percpu_init(void) * * The GNU linker incorrectly associates: * __init_begin + * __per_cpu_load * * The "gold" linker incorrectly associates: * init_per_cpu__irq_stack_union @@ -738,6 +739,7 @@ static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) { return (sym->st_shndx == per_cpu_shndx) && strcmp(symname, "__init_begin") && + strcmp(symname, "__per_cpu_load") && strncmp(symname, "init_per_cpu_", 13); } From f4fccac05f7f6bacb8e481a84d175e85ffcf9fe2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:24:59 +0100 Subject: [PATCH 010/981] x86/efi: Simplify EFI_DEBUG ... and lose one #ifdef .. #endif sandwich. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1d3372ac9c66..f396163b0402 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -51,7 +51,7 @@ #include #include -#define EFI_DEBUG 1 +#define EFI_DEBUG #define EFI_MIN_RESERVE 5120 @@ -398,9 +398,9 @@ int __init efi_memblock_x86_reserve_range(void) return 0; } -#if EFI_DEBUG static void __init print_efi_memmap(void) { +#ifdef EFI_DEBUG efi_memory_desc_t *md; void *p; int i; @@ -415,8 +415,8 @@ static void __init print_efi_memmap(void) md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), (md->num_pages >> (20 - EFI_PAGE_SHIFT))); } -} #endif /* EFI_DEBUG */ +} void __init efi_reserve_boot_services(void) { @@ -696,10 +696,7 @@ void __init efi_init(void) x86_platform.set_wallclock = efi_set_rtc_mmss; } #endif - -#if EFI_DEBUG print_efi_memmap(); -#endif } void __init efi_late_init(void) From 0fd64c23fdf556e9e68580cff03b3505797bbf53 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:00 +0100 Subject: [PATCH 011/981] x86/mm/pageattr: Lookup address in an arbitrary PGD This is preparatory work in order to be able to map pages into a specified PGD and not implicitly and only into init_mm. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bb32480c2d71..c53de62a1170 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -30,6 +30,7 @@ */ struct cpa_data { unsigned long *vaddr; + pgd_t *pgd; pgprot_t mask_set; pgprot_t mask_clr; int numpages; @@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, return prot; } -/* - * Lookup the page table entry for a virtual address. Return a pointer - * to the entry and the level of the mapping. - * - * Note: We return pud and pmd either when the entry is marked large - * or when the present bit is not set. Otherwise we would return a - * pointer to a nonexisting mapping. - */ -pte_t *lookup_address(unsigned long address, unsigned int *level) +static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address, + unsigned int *level) { - pgd_t *pgd = pgd_offset_k(address); pud_t *pud; pmd_t *pmd; @@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) return pte_offset_kernel(pmd, address); } + +/* + * Lookup the page table entry for a virtual address. Return a pointer + * to the entry and the level of the mapping. + * + * Note: We return pud and pmd either when the entry is marked large + * or when the present bit is not set. Otherwise we would return a + * pointer to a nonexisting mapping. + */ +pte_t *lookup_address(unsigned long address, unsigned int *level) +{ + return __lookup_address_in_pgd(pgd_offset_k(address), address, level); +} EXPORT_SYMBOL_GPL(lookup_address); +static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, + unsigned int *level) +{ + if (cpa->pgd) + return __lookup_address_in_pgd(cpa->pgd + pgd_index(address), + address, level); + + return lookup_address(address, level); +} + /* * This is necessary because __pa() does not work on some * kinds of memory, like vmalloc() or the alloc_remap() From f3f729661e8db476ac427a97de015307aebb7404 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:01 +0100 Subject: [PATCH 012/981] x86/mm/pageattr: Add a PGD pagetable populating function This allocates, if necessary, and populates the corresponding PGD entry with a PUD page. The next population level is a dummy macro which will be removed by the next patch and it is added here to keep the patch small and easily reviewable but not break bisection, at the same time. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index c53de62a1170..4b47ae0602e1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -666,6 +666,45 @@ static int split_large_page(pte_t *kpte, unsigned long address) return 0; } +#define populate_pud(cpa, addr, pgd, pgprot) (-1) + +/* + * Restrictions for kernel page table do not necessarily apply when mapping in + * an alternate PGD. + */ +static int populate_pgd(struct cpa_data *cpa, unsigned long addr) +{ + pgprot_t pgprot = __pgprot(_KERNPG_TABLE); + bool allocd_pgd = false; + pgd_t *pgd_entry; + pud_t *pud = NULL; /* shut up gcc */ + int ret; + + pgd_entry = cpa->pgd + pgd_index(addr); + + /* + * Allocate a PUD page and hand it down for mapping. + */ + if (pgd_none(*pgd_entry)) { + pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); + if (!pud) + return -1; + + set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); + allocd_pgd = true; + } + + pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(pgprot) |= pgprot_val(cpa->mask_set); + + ret = populate_pud(cpa, addr, pgd_entry, pgprot); + if (ret < 0) + return ret; + + cpa->numpages = ret; + return 0; +} + static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, int primary) { From 4b23538d88c87d9c693ad87c8c808e92a505a6e6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:02 +0100 Subject: [PATCH 013/981] x86/mm/pageattr: Add a PUD pagetable populating function Add the next level of the pagetable populating function, we handle chunks around a 1G boundary by mapping them with the lower level functions - otherwise we use 1G pages for the mappings, thus using as less amount of pagetable pages as possible. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 87 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4b47ae0602e1..81deca77b871 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -666,7 +666,92 @@ static int split_large_page(pte_t *kpte, unsigned long address) return 0; } -#define populate_pud(cpa, addr, pgd, pgprot) (-1) +static int alloc_pmd_page(pud_t *pud) +{ + pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); + if (!pmd) + return -1; + + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + return 0; +} + +#define populate_pmd(cpa, start, end, pages, pud, pgprot) (-1) + +static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, + pgprot_t pgprot) +{ + pud_t *pud; + unsigned long end; + int cur_pages = 0; + + end = start + (cpa->numpages << PAGE_SHIFT); + + /* + * Not on a Gb page boundary? => map everything up to it with + * smaller pages. + */ + if (start & (PUD_SIZE - 1)) { + unsigned long pre_end; + unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; + + pre_end = min_t(unsigned long, end, next_page); + cur_pages = (pre_end - start) >> PAGE_SHIFT; + cur_pages = min_t(int, (int)cpa->numpages, cur_pages); + + pud = pud_offset(pgd, start); + + /* + * Need a PMD page? + */ + if (pud_none(*pud)) + if (alloc_pmd_page(pud)) + return -1; + + cur_pages = populate_pmd(cpa, start, pre_end, cur_pages, + pud, pgprot); + if (cur_pages < 0) + return cur_pages; + + start = pre_end; + } + + /* We mapped them all? */ + if (cpa->numpages == cur_pages) + return cur_pages; + + pud = pud_offset(pgd, start); + + /* + * Map everything starting from the Gb boundary, possibly with 1G pages + */ + while (end - start >= PUD_SIZE) { + set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); + + start += PUD_SIZE; + cpa->pfn += PUD_SIZE; + cur_pages += PUD_SIZE >> PAGE_SHIFT; + pud++; + } + + /* Map trailing leftover */ + if (start < end) { + int tmp; + + pud = pud_offset(pgd, start); + if (pud_none(*pud)) + if (alloc_pmd_page(pud)) + return -1; + + tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages, + pud, pgprot); + if (tmp < 0) + return cur_pages; + + cur_pages += tmp; + } + return cur_pages; +} /* * Restrictions for kernel page table do not necessarily apply when mapping in From f900a4b8ab0f462d89a9fcb6173cac1403415b16 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:03 +0100 Subject: [PATCH 014/981] x86/mm/pageattr: Add a PMD pagetable populating function Handle PMD-level mappings the same as PUD ones. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 82 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 81deca77b871..968398b023c0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -666,6 +666,16 @@ static int split_large_page(pte_t *kpte, unsigned long address) return 0; } +static int alloc_pte_page(pmd_t *pmd) +{ + pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); + if (!pte) + return -1; + + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + return 0; +} + static int alloc_pmd_page(pud_t *pud) { pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); @@ -676,7 +686,77 @@ static int alloc_pmd_page(pud_t *pud) return 0; } -#define populate_pmd(cpa, start, end, pages, pud, pgprot) (-1) +#define populate_pte(cpa, start, end, pages, pmd, pgprot) do {} while (0) + +static int populate_pmd(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pud_t *pud, pgprot_t pgprot) +{ + unsigned int cur_pages = 0; + pmd_t *pmd; + + /* + * Not on a 2M boundary? + */ + if (start & (PMD_SIZE - 1)) { + unsigned long pre_end = start + (num_pages << PAGE_SHIFT); + unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; + + pre_end = min_t(unsigned long, pre_end, next_page); + cur_pages = (pre_end - start) >> PAGE_SHIFT; + cur_pages = min_t(unsigned int, num_pages, cur_pages); + + /* + * Need a PTE page? + */ + pmd = pmd_offset(pud, start); + if (pmd_none(*pmd)) + if (alloc_pte_page(pmd)) + return -1; + + populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot); + + start = pre_end; + } + + /* + * We mapped them all? + */ + if (num_pages == cur_pages) + return cur_pages; + + while (end - start >= PMD_SIZE) { + + /* + * We cannot use a 1G page so allocate a PMD page if needed. + */ + if (pud_none(*pud)) + if (alloc_pmd_page(pud)) + return -1; + + pmd = pmd_offset(pud, start); + + set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); + + start += PMD_SIZE; + cpa->pfn += PMD_SIZE; + cur_pages += PMD_SIZE >> PAGE_SHIFT; + } + + /* + * Map trailing 4K pages. + */ + if (start < end) { + pmd = pmd_offset(pud, start); + if (pmd_none(*pmd)) + if (alloc_pte_page(pmd)) + return -1; + + populate_pte(cpa, start, end, num_pages - cur_pages, + pmd, pgprot); + } + return num_pages; +} static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, pgprot_t pgprot) From c6b6f363f7b24aa448994e3a65c4d5b3116acfcc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:04 +0100 Subject: [PATCH 015/981] x86/mm/pageattr: Add a PTE pagetable populating function Handle last level by unconditionally writing the PTEs into the PTE page while paying attention to the NX bit. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 968398b023c0..2a1308a8c072 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -686,7 +686,27 @@ static int alloc_pmd_page(pud_t *pud) return 0; } -#define populate_pte(cpa, start, end, pages, pmd, pgprot) do {} while (0) +static void populate_pte(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pmd_t *pmd, pgprot_t pgprot) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, start); + + while (num_pages-- && start < end) { + + /* deal with the NX bit */ + if (!(pgprot_val(pgprot) & _PAGE_NX)) + cpa->pfn &= ~_PAGE_NX; + + set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); + + start += PAGE_SIZE; + cpa->pfn += PAGE_SIZE; + pte++; + } +} static int populate_pmd(struct cpa_data *cpa, unsigned long start, unsigned long end, From 0bb8aeee7b73b21e09d3ea12f2120d974f70b669 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:05 +0100 Subject: [PATCH 016/981] x86/mm/pageattr: Add a PUD error unwinding path In case we encounter an error during the mapping of a region, we want to unwind what we've established so far exactly the way we did the mapping. This is the PUD part kept deliberately small for easier review. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 60 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2a1308a8c072..1cbdbbc35b47 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -666,6 +666,51 @@ static int split_large_page(pte_t *kpte, unsigned long address) return 0; } +#define unmap_pmd_range(pud, start, pre_end) do {} while (0) + +static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) +{ + pud_t *pud = pud_offset(pgd, start); + + /* + * Not on a GB page boundary? + */ + if (start & (PUD_SIZE - 1)) { + unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; + unsigned long pre_end = min_t(unsigned long, end, next_page); + + unmap_pmd_range(pud, start, pre_end); + + start = pre_end; + pud++; + } + + /* + * Try to unmap in 1G chunks? + */ + while (end - start >= PUD_SIZE) { + + if (pud_large(*pud)) + pud_clear(pud); + else + unmap_pmd_range(pud, start, start + PUD_SIZE); + + start += PUD_SIZE; + pud++; + } + + /* + * 2M leftovers? + */ + if (start < end) + unmap_pmd_range(pud, start, end); + + /* + * No need to try to free the PUD page because we'll free it in + * populate_pgd's error path + */ +} + static int alloc_pte_page(pmd_t *pmd) { pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); @@ -883,9 +928,20 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pgprot_val(pgprot) |= pgprot_val(cpa->mask_set); ret = populate_pud(cpa, addr, pgd_entry, pgprot); - if (ret < 0) - return ret; + if (ret < 0) { + unmap_pud_range(pgd_entry, addr, + addr + (cpa->numpages << PAGE_SHIFT)); + if (allocd_pgd) { + /* + * If I allocated this PUD page, I can just as well + * free it in this error path. + */ + pgd_clear(pgd_entry); + free_page((unsigned long)pud); + } + return ret; + } cpa->numpages = ret; return 0; } From 52a628fb454d13d944bb3c8a89a314cc3affa417 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:06 +0100 Subject: [PATCH 017/981] x86/mm/pageattr: Add last levels of error path We try to free the pagetable pages once we've unmapped our portion. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 94 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1cbdbbc35b47..db8ace29514f 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -666,7 +666,99 @@ static int split_large_page(pte_t *kpte, unsigned long address) return 0; } -#define unmap_pmd_range(pud, start, pre_end) do {} while (0) +static bool try_to_free_pte_page(pte_t *pte) +{ + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) + if (!pte_none(pte[i])) + return false; + + free_page((unsigned long)pte); + return true; +} + +static bool try_to_free_pmd_page(pmd_t *pmd) +{ + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) + if (!pmd_none(pmd[i])) + return false; + + free_page((unsigned long)pmd); + return true; +} + +static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) +{ + pte_t *pte = pte_offset_kernel(pmd, start); + + while (start < end) { + set_pte(pte, __pte(0)); + + start += PAGE_SIZE; + pte++; + } + + if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { + pmd_clear(pmd); + return true; + } + return false; +} + +static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, + unsigned long start, unsigned long end) +{ + if (unmap_pte_range(pmd, start, end)) + if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + pud_clear(pud); +} + +static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) +{ + pmd_t *pmd = pmd_offset(pud, start); + + /* + * Not on a 2MB page boundary? + */ + if (start & (PMD_SIZE - 1)) { + unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; + unsigned long pre_end = min_t(unsigned long, end, next_page); + + __unmap_pmd_range(pud, pmd, start, pre_end); + + start = pre_end; + pmd++; + } + + /* + * Try to unmap in 2M chunks. + */ + while (end - start >= PMD_SIZE) { + if (pmd_large(*pmd)) + pmd_clear(pmd); + else + __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); + + start += PMD_SIZE; + pmd++; + } + + /* + * 4K leftovers? + */ + if (start < end) + return __unmap_pmd_range(pud, pmd, start, end); + + /* + * Try again to free the PMD page if haven't succeeded above. + */ + if (!pud_none(*pud)) + if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + pud_clear(pud); +} static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) { From 82f0712ca0f947170e785300b5c39d9c25e2f6ff Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:07 +0100 Subject: [PATCH 018/981] x86/mm/cpa: Map in an arbitrary pgd Add the ability to map pages in an arbitrary pgd. This wires in the remaining stuff so that there's a new interface with which you can map a region into an arbitrary PGD. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 53 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index db8ace29514f..b3b19f46c016 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -453,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * Check for races, another CPU might have split this page * up already: */ - tmp = lookup_address(address, &level); + tmp = _lookup_address_cpa(cpa, address, &level); if (tmp != kpte) goto out_unlock; @@ -559,7 +559,8 @@ out_unlock: } static int -__split_large_page(pte_t *kpte, unsigned long address, struct page *base) +__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, + struct page *base) { pte_t *pbase = (pte_t *)page_address(base); unsigned long pfn, pfninc = 1; @@ -572,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base) * Check for races, another CPU might have split this page * up for us already: */ - tmp = lookup_address(address, &level); + tmp = _lookup_address_cpa(cpa, address, &level); if (tmp != kpte) { spin_unlock(&pgd_lock); return 1; @@ -648,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base) return 0; } -static int split_large_page(pte_t *kpte, unsigned long address) +static int split_large_page(struct cpa_data *cpa, pte_t *kpte, + unsigned long address) { struct page *base; @@ -660,7 +662,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) if (!base) return -ENOMEM; - if (__split_large_page(kpte, address, base)) + if (__split_large_page(cpa, kpte, address, base)) __free_page(base); return 0; @@ -1041,6 +1043,9 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, int primary) { + if (cpa->pgd) + return populate_pgd(cpa, vaddr); + /* * Ignore all non primary paths. */ @@ -1085,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) else address = *cpa->vaddr; repeat: - kpte = lookup_address(address, &level); + kpte = _lookup_address_cpa(cpa, address, &level); if (!kpte) return __cpa_process_fault(cpa, address, primary); @@ -1149,7 +1154,7 @@ repeat: /* * We have to split the large page: */ - err = split_large_page(kpte, address); + err = split_large_page(cpa, kpte, address); if (!err) { /* * Do a global flush tlb after splitting the large page @@ -1298,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, int ret, cache, checkalias; unsigned long baddr = 0; + memset(&cpa, 0, sizeof(cpa)); + /* * Check, if we are requested to change a not supported * feature: @@ -1744,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages) { unsigned long tempaddr = (unsigned long) page_address(page); struct cpa_data cpa = { .vaddr = &tempaddr, + .pgd = NULL, .numpages = numpages, .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), .mask_clr = __pgprot(0), @@ -1762,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages) { unsigned long tempaddr = (unsigned long) page_address(page); struct cpa_data cpa = { .vaddr = &tempaddr, + .pgd = NULL, .numpages = numpages, .mask_set = __pgprot(0), .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), @@ -1822,6 +1831,36 @@ bool kernel_page_present(struct page *page) #endif /* CONFIG_DEBUG_PAGEALLOC */ +int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, + unsigned numpages, unsigned long page_flags) +{ + int retval = -EINVAL; + + struct cpa_data cpa = { + .vaddr = &address, + .pfn = pfn, + .pgd = pgd, + .numpages = numpages, + .mask_set = __pgprot(0), + .mask_clr = __pgprot(0), + .flags = 0, + }; + + if (!(__supported_pte_mask & _PAGE_NX)) + goto out; + + if (!(page_flags & _PAGE_NX)) + cpa.mask_clr = __pgprot(_PAGE_NX); + + cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); + + retval = __change_page_attr_set_clr(&cpa, 0); + __flush_tlb_all(); + +out: + return retval; +} + /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here. From d2f7cbe7b26a74dbbbf8f325b2a6fd01bc34032c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:08 +0100 Subject: [PATCH 019/981] x86/efi: Runtime services virtual mapping We map the EFI regions needed for runtime services non-contiguously, with preserved alignment on virtual addresses starting from -4G down for a total max space of 64G. This way, we provide for stable runtime services addresses across kernels so that a kexec'd kernel can still use them. Thus, they're mapped in a separate pagetable so that we don't pollute the kernel namespace. Add an efi= kernel command line parameter for passing miscellaneous options and chicken bits from the command line. While at it, add a chicken bit called "efi=old_map" which can be used as a fallback to the old runtime services mapping method in case there's some b0rkage with a particular EFI implementation (haha, it is hard to hold up the sarcasm here...). Also, add the UEFI RT VA space to Documentation/x86/x86_64/mm.txt. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- Documentation/kernel-parameters.txt | 6 ++ Documentation/x86/x86_64/mm.txt | 7 ++ arch/x86/include/asm/efi.h | 64 +++++++++++----- arch/x86/include/asm/pgtable_types.h | 3 +- arch/x86/platform/efi/efi.c | 94 ++++++++++++++++------- arch/x86/platform/efi/efi_32.c | 9 ++- arch/x86/platform/efi/efi_64.c | 109 +++++++++++++++++++++++++++ arch/x86/platform/efi/efi_stub_64.S | 54 +++++++++++++ include/linux/efi.h | 1 + 9 files changed, 300 insertions(+), 47 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7a0f202d482e..ed43e92b0e7e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -835,6 +835,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. edd= [EDD] Format: {"off" | "on" | "skip[mbr]"} + efi= [EFI] + Format: { "old_map" } + old_map [X86-64]: switch to the old ioremap-based EFI + runtime services mapping. 32-bit still uses this one by + default. + efi_no_storage_paranoia [EFI; X86] Using this parameter you can use more than 50% of your efi variable storage. Use this parameter only if diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 881582f75c9c..c584a51add15 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -28,4 +28,11 @@ reference. Current X86-64 implementations only support 40 bits of address space, but we support up to 46 bits. This expands into MBZ space in the page tables. +->trampoline_pgd: + +We map EFI runtime services in the aforementioned PGD in the virtual +range of 64Gb (arbitrarily set, can be raised if needed) + +0xffffffef00000000 - 0xffffffff00000000 + -Andi Kleen, Jul 2004 diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 65c6e6e3a552..89a05b0507b9 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -1,6 +1,24 @@ #ifndef _ASM_X86_EFI_H #define _ASM_X86_EFI_H +/* + * We map the EFI regions needed for runtime services non-contiguously, + * with preserved alignment on virtual addresses starting from -4G down + * for a total max space of 64G. This way, we provide for stable runtime + * services addresses across kernels so that a kexec'd kernel can still + * use them. + * + * This is the main reason why we're doing stable VA mappings for RT + * services. + * + * This flag is used in conjuction with a chicken bit called + * "efi=old_map" which can be used as a fallback to the old runtime + * services mapping method in case there's some b0rkage with a + * particular EFI implementation (haha, it is hard to hold up the + * sarcasm here...). + */ +#define EFI_OLD_MEMMAP EFI_ARCH_1 + #ifdef CONFIG_X86_32 #define EFI_LOADER_SIGNATURE "EL32" @@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ (u64)(a4), (u64)(a5), (u64)(a6)) +#define _efi_call_virtX(x, f, ...) \ +({ \ + efi_status_t __s; \ + \ + efi_sync_low_kernel_mappings(); \ + preempt_disable(); \ + __s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \ + preempt_enable(); \ + __s; \ +}) + #define efi_call_virt0(f) \ - efi_call0((efi.systab->runtime->f)) -#define efi_call_virt1(f, a1) \ - efi_call1((efi.systab->runtime->f), (u64)(a1)) -#define efi_call_virt2(f, a1, a2) \ - efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2)) -#define efi_call_virt3(f, a1, a2, a3) \ - efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ - (u64)(a3)) -#define efi_call_virt4(f, a1, a2, a3, a4) \ - efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ - (u64)(a3), (u64)(a4)) -#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ - efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ - (u64)(a3), (u64)(a4), (u64)(a5)) -#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ - efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ - (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) + _efi_call_virtX(0, f) +#define efi_call_virt1(f, a1) \ + _efi_call_virtX(1, f, (u64)(a1)) +#define efi_call_virt2(f, a1, a2) \ + _efi_call_virtX(2, f, (u64)(a1), (u64)(a2)) +#define efi_call_virt3(f, a1, a2, a3) \ + _efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3)) +#define efi_call_virt4(f, a1, a2, a3, a4) \ + _efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4)) +#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ + _efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5)) +#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ + _efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); @@ -95,12 +120,17 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, extern int add_efi_memmap; extern unsigned long x86_efi_facility; +extern struct efi_scratch efi_scratch; extern void efi_set_executable(efi_memory_desc_t *md, bool executable); extern int efi_memblock_x86_reserve_range(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); extern void efi_unmap_memmap(void); extern void efi_memory_uc(u64 addr, unsigned long size); +extern void __init efi_map_region(efi_memory_desc_t *md); +extern void efi_sync_low_kernel_mappings(void); +extern void efi_setup_page_tables(void); +extern void __init old_map_region(efi_memory_desc_t *md); #ifdef CONFIG_EFI diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f4843e031131..028e28b6fc2c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -379,7 +379,8 @@ static inline void update_page_count(int level, unsigned long pages) { } */ extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern phys_addr_t slow_virt_to_phys(void *__address); - +extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, + unsigned numpages, unsigned long page_flags); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f396163b0402..b453069236fd 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -12,6 +12,8 @@ * Bibo Mao * Chandramouli Narayanan * Huang Ying + * Copyright (C) 2013 SuSE Labs + * Borislav Petkov - runtime services VA mapping * * Copied from efi_32.c to eliminate the duplicated code between EFI * 32/64 support code. --ying 2007-10-26 @@ -745,21 +747,56 @@ void efi_memory_uc(u64 addr, unsigned long size) set_memory_uc(addr, npages); } +void __init old_map_region(efi_memory_desc_t *md) +{ + u64 start_pfn, end_pfn, end; + unsigned long size; + void *va; + + start_pfn = PFN_DOWN(md->phys_addr); + size = md->num_pages << PAGE_SHIFT; + end = md->phys_addr + size; + end_pfn = PFN_UP(end); + + if (pfn_range_is_mapped(start_pfn, end_pfn)) { + va = __va(md->phys_addr); + + if (!(md->attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)va, size); + } else + va = efi_ioremap(md->phys_addr, size, + md->type, md->attribute); + + md->virt_addr = (u64) (unsigned long) va; + if (!va) + pr_err("ioremap of 0x%llX failed!\n", + (unsigned long long)md->phys_addr); +} + /* * This function will switch the EFI runtime services to virtual mode. - * Essentially, look through the EFI memmap and map every region that - * has the runtime attribute bit set in its memory descriptor and update - * that memory descriptor with the virtual address obtained from ioremap(). - * This enables the runtime services to be called without having to + * Essentially, we look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor into the + * ->trampoline_pgd page table using a top-down VA allocation scheme. + * + * The old method which used to update that memory descriptor with the + * virtual address obtained from ioremap() is still supported when the + * kernel is booted with efi=old_map on its command line. Same old + * method enabled the runtime services to be called without having to * thunk back into physical mode for every invocation. + * + * The new method does a pagetable switch in a preemption-safe manner + * so that we're in a different address space when calling a runtime + * function. For function arguments passing we do copy the PGDs of the + * kernel page table into ->trampoline_pgd prior to each call. */ void __init efi_enter_virtual_mode(void) { efi_memory_desc_t *md, *prev_md = NULL; - efi_status_t status; + void *p, *new_memmap = NULL; unsigned long size; - u64 end, systab, start_pfn, end_pfn; - void *p, *va, *new_memmap = NULL; + efi_status_t status; + u64 end, systab; int count = 0; efi.systab = NULL; @@ -768,7 +805,6 @@ void __init efi_enter_virtual_mode(void) * We don't do virtual mode, since we don't do runtime services, on * non-native EFI */ - if (!efi_is_native()) { efi_unmap_memmap(); return; @@ -799,6 +835,7 @@ void __init efi_enter_virtual_mode(void) continue; } prev_md = md; + } for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -808,33 +845,18 @@ void __init efi_enter_virtual_mode(void) md->type != EFI_BOOT_SERVICES_DATA) continue; + efi_map_region(md); + size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - start_pfn = PFN_DOWN(md->phys_addr); - end_pfn = PFN_UP(end); - if (pfn_range_is_mapped(start_pfn, end_pfn)) { - va = __va(md->phys_addr); - - if (!(md->attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)va, size); - } else - va = efi_ioremap(md->phys_addr, size, - md->type, md->attribute); - - md->virt_addr = (u64) (unsigned long) va; - - if (!va) { - pr_err("ioremap of 0x%llX failed!\n", - (unsigned long long)md->phys_addr); - continue; - } - systab = (u64) (unsigned long) efi_phys.systab; if (md->phys_addr <= systab && systab < end) { systab += md->virt_addr - md->phys_addr; + efi.systab = (efi_system_table_t *) (unsigned long) systab; } + new_memmap = krealloc(new_memmap, (count + 1) * memmap.desc_size, GFP_KERNEL); @@ -845,6 +867,9 @@ void __init efi_enter_virtual_mode(void) BUG_ON(!efi.systab); + efi_setup_page_tables(); + efi_sync_low_kernel_mappings(); + status = phys_efi_set_virtual_address_map( memmap.desc_size * count, memmap.desc_size, @@ -877,7 +902,8 @@ void __init efi_enter_virtual_mode(void) efi.query_variable_info = virt_efi_query_variable_info; efi.update_capsule = virt_efi_update_capsule; efi.query_capsule_caps = virt_efi_query_capsule_caps; - if (__supported_pte_mask & _PAGE_NX) + + if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) runtime_code_page_mkexec(); kfree(new_memmap); @@ -1007,3 +1033,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) return EFI_SUCCESS; } EXPORT_SYMBOL_GPL(efi_query_variable_store); + +static int __init parse_efi_cmdline(char *str) +{ + if (*str == '=') + str++; + + if (!strncmp(str, "old_map", 7)) + set_bit(EFI_OLD_MEMMAP, &x86_efi_facility); + + return 0; +} +early_param("efi", parse_efi_cmdline); diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e446941dd7..e94557cf5487 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -37,9 +37,16 @@ * claim EFI runtime service handler exclusively and to duplicate a memory in * low memory space say 0 - 3G. */ - static unsigned long efi_rt_eflags; +void efi_sync_low_kernel_mappings(void) {} +void efi_setup_page_tables(void) {} + +void __init efi_map_region(efi_memory_desc_t *md) +{ + old_map_region(md); +} + void efi_call_phys_prelog(void) { struct desc_ptr gdt_descr; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39a0e7f1f0a3..bf286c386d33 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -38,10 +38,28 @@ #include #include #include +#include static pgd_t *save_pgd __initdata; static unsigned long efi_flags __initdata; +/* + * We allocate runtime services regions bottom-up, starting from -4G, i.e. + * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. + */ +static u64 efi_va = -4 * (1UL << 30); +#define EFI_VA_END (-68 * (1UL << 30)) + +/* + * Scratch space used for switching the pagetable in the EFI stub + */ +struct efi_scratch { + u64 r15; + u64 prev_cr3; + pgd_t *efi_pgt; + bool use_pgd; +}; + static void __init early_code_mapping_set_exec(int executable) { efi_memory_desc_t *md; @@ -65,6 +83,9 @@ void __init efi_call_phys_prelog(void) int pgd; int n_pgds; + if (!efi_enabled(EFI_OLD_MEMMAP)) + return; + early_code_mapping_set_exec(1); local_irq_save(efi_flags); @@ -86,6 +107,10 @@ void __init efi_call_phys_epilog(void) */ int pgd; int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + + if (!efi_enabled(EFI_OLD_MEMMAP)) + return; + for (pgd = 0; pgd < n_pgds; pgd++) set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); kfree(save_pgd); @@ -94,6 +119,90 @@ void __init efi_call_phys_epilog(void) early_code_mapping_set_exec(0); } +/* + * Add low kernel mappings for passing arguments to EFI functions. + */ +void efi_sync_low_kernel_mappings(void) +{ + unsigned num_pgds; + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + + if (efi_enabled(EFI_OLD_MEMMAP)) + return; + + num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); + + memcpy(pgd + pgd_index(PAGE_OFFSET), + init_mm.pgd + pgd_index(PAGE_OFFSET), + sizeof(pgd_t) * num_pgds); +} + +void efi_setup_page_tables(void) +{ + efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; + + if (!efi_enabled(EFI_OLD_MEMMAP)) + efi_scratch.use_pgd = true; +} + +static void __init __map_region(efi_memory_desc_t *md, u64 va) +{ + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + unsigned long pf = 0, size; + u64 end; + + if (!(md->attribute & EFI_MEMORY_WB)) + pf |= _PAGE_PCD; + + size = md->num_pages << PAGE_SHIFT; + end = va + size; + + if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, va); +} + +void __init efi_map_region(efi_memory_desc_t *md) +{ + unsigned long size = md->num_pages << PAGE_SHIFT; + u64 pa = md->phys_addr; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return old_map_region(md); + + /* + * Make sure the 1:1 mappings are present as a catch-all for b0rked + * firmware which doesn't update all internal pointers after switching + * to virtual mode and would otherwise crap on us. + */ + __map_region(md, md->phys_addr); + + efi_va -= size; + + /* Is PA 2M-aligned? */ + if (!(pa & (PMD_SIZE - 1))) { + efi_va &= PMD_MASK; + } else { + u64 pa_offset = pa & (PMD_SIZE - 1); + u64 prev_va = efi_va; + + /* get us the same offset within this 2M page */ + efi_va = (efi_va & PMD_MASK) + pa_offset; + + if (efi_va > prev_va) + efi_va -= PMD_SIZE; + } + + if (efi_va < EFI_VA_END) { + pr_warn(FW_WARN "VA address range overflow!\n"); + return; + } + + /* Do the VA map */ + __map_region(md, efi_va); + md->virt_addr = efi_va; +} + void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, u32 type, u64 attribute) { diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab8146..88073b140298 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -34,10 +34,47 @@ mov %rsi, %cr0; \ mov (%rsp), %rsp + /* stolen from gcc */ + .macro FLUSH_TLB_ALL + movq %r15, efi_scratch(%rip) + movq %r14, efi_scratch+8(%rip) + movq %cr4, %r15 + movq %r15, %r14 + andb $0x7f, %r14b + movq %r14, %cr4 + movq %r15, %cr4 + movq efi_scratch+8(%rip), %r14 + movq efi_scratch(%rip), %r15 + .endm + + .macro SWITCH_PGT + cmpb $0, efi_scratch+24(%rip) + je 1f + movq %r15, efi_scratch(%rip) # r15 + # save previous CR3 + movq %cr3, %r15 + movq %r15, efi_scratch+8(%rip) # prev_cr3 + movq efi_scratch+16(%rip), %r15 # EFI pgt + movq %r15, %cr3 + 1: + .endm + + .macro RESTORE_PGT + cmpb $0, efi_scratch+24(%rip) + je 2f + movq efi_scratch+8(%rip), %r15 + movq %r15, %cr3 + movq efi_scratch(%rip), %r15 + FLUSH_TLB_ALL + 2: + .endm + ENTRY(efi_call0) SAVE_XMM subq $32, %rsp + SWITCH_PGT call *%rdi + RESTORE_PGT addq $32, %rsp RESTORE_XMM ret @@ -47,7 +84,9 @@ ENTRY(efi_call1) SAVE_XMM subq $32, %rsp mov %rsi, %rcx + SWITCH_PGT call *%rdi + RESTORE_PGT addq $32, %rsp RESTORE_XMM ret @@ -57,7 +96,9 @@ ENTRY(efi_call2) SAVE_XMM subq $32, %rsp mov %rsi, %rcx + SWITCH_PGT call *%rdi + RESTORE_PGT addq $32, %rsp RESTORE_XMM ret @@ -68,7 +109,9 @@ ENTRY(efi_call3) subq $32, %rsp mov %rcx, %r8 mov %rsi, %rcx + SWITCH_PGT call *%rdi + RESTORE_PGT addq $32, %rsp RESTORE_XMM ret @@ -80,7 +123,9 @@ ENTRY(efi_call4) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx + SWITCH_PGT call *%rdi + RESTORE_PGT addq $32, %rsp RESTORE_XMM ret @@ -93,7 +138,9 @@ ENTRY(efi_call5) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx + SWITCH_PGT call *%rdi + RESTORE_PGT addq $48, %rsp RESTORE_XMM ret @@ -109,8 +156,15 @@ ENTRY(efi_call6) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx + SWITCH_PGT call *%rdi + RESTORE_PGT addq $48, %rsp RESTORE_XMM ret ENDPROC(efi_call6) + + .data +ENTRY(efi_scratch) + .fill 3,8,0 + .byte 0 diff --git a/include/linux/efi.h b/include/linux/efi.h index bc5687d0f315..6c0ca528300c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -653,6 +653,7 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ #define EFI_MEMMAP 4 /* Can we use EFI memory map? */ #define EFI_64BIT 5 /* Is the firmware 64-bit? */ +#define EFI_ARCH_1 6 /* First arch-specific bit */ #ifdef CONFIG_EFI # ifdef CONFIG_X86 From ee41143027706d9f342dfe05487a00b20887fde7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 31 Oct 2013 17:25:09 +0100 Subject: [PATCH 020/981] x86/efi: Check krealloc return value Check it just in case. We might just as well panic there because runtime won't be functioning anyway. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index b453069236fd..3fac4dee492f 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -860,6 +860,9 @@ void __init efi_enter_virtual_mode(void) new_memmap = krealloc(new_memmap, (count + 1) * memmap.desc_size, GFP_KERNEL); + if (!new_memmap) + goto err_out; + memcpy(new_memmap + (count * memmap.desc_size), md, memmap.desc_size); count++; @@ -914,6 +917,11 @@ void __init efi_enter_virtual_mode(void) EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, 0, NULL); + + return; + + err_out: + pr_err("Error reallocating memory, EFI runtime non-functional!\n"); } /* From a653f3563c51c7bb7de63d607bef09d3baddaeb8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 11 Nov 2013 14:28:39 -0800 Subject: [PATCH 021/981] x86, kaslr: Mix entropy sources together as needed Depending on availability, mix the RDRAND and RDTSC entropy together with XOR. Only when neither is available should the i8254 be used. Update the Kconfig documentation to reflect this. Additionally, since bits used for entropy is masked elsewhere, drop the needless masking in the get_random_long(). Similarly, use the entire TSC, not just the low 32 bits. Finally, to improve the starting entropy, do a simple hashing of a build-time versions string and the boot-time boot_params structure for some additional level of unpredictability. Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/20131111222839.GA28616@www.outflux.net Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 14 +++--- arch/x86/boot/compressed/aslr.c | 75 +++++++++++++++++++++++++-------- 2 files changed, 66 insertions(+), 23 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 51f439953d23..596cd9edeb9c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1735,13 +1735,17 @@ config RANDOMIZE_BASE deters exploit attempts relying on knowledge of the location of kernel internals. - Entropy is generated using the RDRAND instruction if it - is supported. If not, then RDTSC is used, if supported. If - neither RDRAND nor RDTSC are supported, then no randomness - is introduced. + Entropy is generated using the RDRAND instruction if it is + supported. If RDTSC is supported, it is used as well. If + neither RDRAND nor RDTSC are supported, then randomness is + read from the i8254 timer. The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET, - and aligned according to PHYSICAL_ALIGN. + and aligned according to PHYSICAL_ALIGN. Since the kernel is + built using 2GiB addressing, and PHYSICAL_ALGIN must be at a + minimum of 2MiB, only 10 bits of entropy is theoretically + possible. At best, due to page table layouts, 64-bit can use + 9 bits of entropy and 32-bit uses 8 bits. config RANDOMIZE_BASE_MAX_OFFSET hex "Maximum ASLR offset allowed" diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 05957986d123..8746487fa916 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -5,6 +5,17 @@ #include #include +#include +#include +#include +#include +#include +#include + +/* Simplified build-specific string for starting entropy. */ +static const char *build_str = UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; + #define I8254_PORT_CONTROL 0x43 #define I8254_PORT_COUNTER0 0x40 #define I8254_CMD_READBACK 0xC0 @@ -25,34 +36,62 @@ static inline u16 i8254(void) return timer; } +static unsigned long rotate_xor(unsigned long hash, const void *area, + size_t size) +{ + size_t i; + unsigned long *ptr = (unsigned long *)area; + + for (i = 0; i < size / sizeof(hash); i++) { + /* Rotate by odd number of bits and XOR. */ + hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); + hash ^= ptr[i]; + } + + return hash; +} + +/* Attempt to create a simple but unpredictable starting entropy. */ +static unsigned long get_random_boot(void) +{ + unsigned long hash = 0; + + hash = rotate_xor(hash, build_str, sizeof(build_str)); + hash = rotate_xor(hash, real_mode, sizeof(*real_mode)); + + return hash; +} + static unsigned long get_random_long(void) { - unsigned long random; + unsigned long raw, random = get_random_boot(); + bool use_i8254 = true; + + debug_putstr("KASLR using"); if (has_cpuflag(X86_FEATURE_RDRAND)) { - debug_putstr("KASLR using RDRAND...\n"); - if (rdrand_long(&random)) - return random; + debug_putstr(" RDRAND"); + if (rdrand_long(&raw)) { + random ^= raw; + use_i8254 = false; + } } if (has_cpuflag(X86_FEATURE_TSC)) { - uint32_t raw; + debug_putstr(" RDTSC"); + rdtscll(raw); - debug_putstr("KASLR using RDTSC...\n"); - rdtscl(raw); - - /* Only use the low bits of rdtsc. */ - random = raw & 0xffff; - } else { - debug_putstr("KASLR using i8254...\n"); - random = i8254(); + random ^= raw; + use_i8254 = false; } - /* Extend timer bits poorly... */ - random |= (random << 16); -#ifdef CONFIG_X86_64 - random |= (random << 32); -#endif + if (use_i8254) { + debug_putstr(" i8254"); + random ^= i8254(); + } + + debug_putstr("...\n"); + return random; } From e8236c4d9338d52d0f2fcecc0b792ac0542e4ee9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 11 Nov 2013 22:45:20 -0800 Subject: [PATCH 022/981] x86, kaslr: Add a circular multiply for better bit diffusion If we don't have RDRAND (in which case nothing else *should* matter), most sources have a highly biased entropy distribution. Use a circular multiply to diffuse the entropic bits. A circular multiply is a good operation for this: it is cheap on standard hardware and because it is symmetric (unlike an ordinary multiply) it doesn't introduce its own bias. Cc: Kees Cook Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/20131111222839.GA28616@www.outflux.net --- arch/x86/boot/compressed/aslr.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 8746487fa916..38a07cc4fbac 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -64,6 +64,11 @@ static unsigned long get_random_boot(void) static unsigned long get_random_long(void) { +#ifdef CONFIG_X86_64 + const unsigned long mix_const = 0x5d6008cbf3848dd3UL; +#else + const unsigned long mix_const = 0x3f39e593UL; +#endif unsigned long raw, random = get_random_boot(); bool use_i8254 = true; @@ -90,6 +95,12 @@ static unsigned long get_random_long(void) random ^= i8254(); } + /* Circular multiply for better bit diffusion */ + asm("mul %3" + : "=a" (random), "=d" (raw) + : "a" (random), "rm" (mix_const)); + random += raw; + debug_putstr("...\n"); return random; From 327f7d72454aecdc7a4a1c847a291a3f224b730f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 12 Nov 2013 08:56:07 -0800 Subject: [PATCH 023/981] x86, kaslr: Use char array to gain sizeof sanity The build_str needs to be char [] not char * for the sizeof() to report the string length. Reported-by: Mathias Krause Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/20131112165607.GA5921@www.outflux.net Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/aslr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 38a07cc4fbac..84be1752dcd8 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -13,7 +13,7 @@ #include /* Simplified build-specific string for starting entropy. */ -static const char *build_str = UTS_RELEASE " (" LINUX_COMPILE_BY "@" +static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; #define I8254_PORT_CONTROL 0x43 From 9dd1220114e00d8ec5cdc20085bbe198b21e1985 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Mon, 11 Nov 2013 09:08:15 -0500 Subject: [PATCH 024/981] smp/cpumask: Make CONFIG_CPUMASK_OFFSTACK=y usable without debug dependency When CONFIG_CPUMASK_OFFSTACK was added in 2008, it was dependent upon CONFIG_DEBUG_PER_CPU_MAPS being enabled, or an architecture could select it. The debug dependency adds additional overhead that isn't required for operation of the feature and which is undesirable for distro kernels. CONFIG_CPUMASK_OFFSTACK=y is needed to increase the CONFIG_NR_CPUS value beyond 512 on x86. So drop the current dependency, its only real dependency is CONFIG_SMP=y. Signed-off-by: Josh Boyer Cc: Rusty Russell Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20131111140815.GB20328@hansolo.jdub.homelinux.org Signed-off-by: Ingo Molnar --- lib/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig b/lib/Kconfig index b3c8be0da17f..50b47cde8b49 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -342,7 +342,8 @@ config CHECK_SIGNATURE bool config CPUMASK_OFFSTACK - bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS + bool "Force CPU masks off stack" + depends on SMP help Use dynamic allocation for cpumask_var_t, instead of putting them on the stack. This is a bit more expensive, but avoids From f4cb1cc18f364d761d5614eb6293cccc6647f259 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Sat, 16 Nov 2013 12:37:01 -0800 Subject: [PATCH 025/981] x86-64, copy_user: Remove zero byte check before copy user buffer. Operation of rep movsb instruction handles zero byte copy. As pointed out by Linus, there is no need to check zero size in kernel. Removing this redundant check saves a few cycles in copy user functions. Reported-by: Linus Torvalds Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1384634221-6006-1-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/lib/copy_user_64.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index a30ca15be21c..ffe4eb9f09eb 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -236,8 +236,6 @@ ENDPROC(copy_user_generic_unrolled) ENTRY(copy_user_generic_string) CFI_STARTPROC ASM_STAC - andl %edx,%edx - jz 4f cmpl $8,%edx jb 2f /* less than 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -249,7 +247,7 @@ ENTRY(copy_user_generic_string) 2: movl %edx,%ecx 3: rep movsb -4: xorl %eax,%eax + xorl %eax,%eax ASM_CLAC ret @@ -279,12 +277,10 @@ ENDPROC(copy_user_generic_string) ENTRY(copy_user_enhanced_fast_string) CFI_STARTPROC ASM_STAC - andl %edx,%edx - jz 2f movl %edx,%ecx 1: rep movsb -2: xorl %eax,%eax + xorl %eax,%eax ASM_CLAC ret From 5305ca10e7f44333e46c1ce57fb87306cb437891 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Nov 2013 14:14:00 -0800 Subject: [PATCH 026/981] x86/mm: Unify pte_to_pgoff() and pgoff_to_pte() helpers Use unified pte_bitop() helper to manipulate bits in pte/pgoff bitfields and convert pte_to_pgoff()/pgoff_to_pte() to inlines. Signed-off-by: Cyrill Gorcunov Signed-off-by: Andrew Morton Cc: Pavel Emelyanov Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable-2level.h | 96 ++++++++++++++++----------- 1 file changed, 57 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 3bf2dd0cf61f..0d193e234647 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -55,6 +55,13 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) #endif +/* Bit manipulation helper on pte/pgoff entry */ +static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift, + unsigned long mask, unsigned int leftshift) +{ + return ((value >> rightshift) & mask) << leftshift; +} + #ifdef CONFIG_MEM_SOFT_DIRTY /* @@ -71,31 +78,34 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) #define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1) -#define pte_to_pgoff(pte) \ - ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \ - & ((1U << PTE_FILE_BITS1) - 1))) \ - + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \ - & ((1U << PTE_FILE_BITS2) - 1)) \ - << (PTE_FILE_BITS1)) \ - + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \ - & ((1U << PTE_FILE_BITS3) - 1)) \ - << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ - + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \ - << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)) +#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1) +#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1) +#define PTE_FILE_MASK3 ((1U << PTE_FILE_BITS3) - 1) -#define pgoff_to_pte(off) \ - ((pte_t) { .pte_low = \ - ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ - + ((((off) >> PTE_FILE_BITS1) \ - & ((1U << PTE_FILE_BITS2) - 1)) \ - << PTE_FILE_SHIFT2) \ - + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ - & ((1U << PTE_FILE_BITS3) - 1)) \ - << PTE_FILE_SHIFT3) \ - + ((((off) >> \ - (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \ - << PTE_FILE_SHIFT4) \ - + _PAGE_FILE }) +#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1) +#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2) +#define PTE_FILE_LSHIFT4 (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3) + +static __always_inline pgoff_t pte_to_pgoff(pte_t pte) +{ + return (pgoff_t) + (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + + pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + + pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, PTE_FILE_MASK3, PTE_FILE_LSHIFT3) + + pte_bitop(pte.pte_low, PTE_FILE_SHIFT4, -1UL, PTE_FILE_LSHIFT4)); +} + +static __always_inline pte_t pgoff_to_pte(pgoff_t off) +{ + return (pte_t){ + .pte_low = + pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + + pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + + pte_bitop(off, PTE_FILE_LSHIFT3, PTE_FILE_MASK3, PTE_FILE_SHIFT3) + + pte_bitop(off, PTE_FILE_LSHIFT4, -1UL, PTE_FILE_SHIFT4) + + _PAGE_FILE, + }; +} #else /* CONFIG_MEM_SOFT_DIRTY */ @@ -115,22 +125,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) #define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) -#define pte_to_pgoff(pte) \ - ((((pte).pte_low >> PTE_FILE_SHIFT1) \ - & ((1U << PTE_FILE_BITS1) - 1)) \ - + ((((pte).pte_low >> PTE_FILE_SHIFT2) \ - & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1) \ - + (((pte).pte_low >> PTE_FILE_SHIFT3) \ - << (PTE_FILE_BITS1 + PTE_FILE_BITS2))) +#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1) +#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1) -#define pgoff_to_pte(off) \ - ((pte_t) { .pte_low = \ - (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ - + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1)) \ - << PTE_FILE_SHIFT2) \ - + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ - << PTE_FILE_SHIFT3) \ - + _PAGE_FILE }) +#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1) +#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2) + +static __always_inline pgoff_t pte_to_pgoff(pte_t pte) +{ + return (pgoff_t) + (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + + pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + + pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, -1UL, PTE_FILE_LSHIFT3)); +} + +static __always_inline pte_t pgoff_to_pte(pgoff_t off) +{ + return (pte_t){ + .pte_low = + pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + + pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + + pte_bitop(off, PTE_FILE_LSHIFT3, -1UL, PTE_FILE_SHIFT3) + + _PAGE_FILE, + }; +} #endif /* CONFIG_MEM_SOFT_DIRTY */ From fd8526ad14c182605e42b64646344b95befd9f94 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 19 Nov 2013 15:17:50 +0200 Subject: [PATCH 027/981] x86/mm: Implement ASLR for hugetlb mappings Matthew noticed that hugetlb mappings don't participate in ASLR on x86-64: % for i in `seq 3`; do > tools/testing/selftests/vm/map_hugetlb | grep address > done Returned address is 0x2aaaaac00000 Returned address is 0x2aaaaac00000 Returned address is 0x2aaaaac00000 /proc/PID/maps entries for the mapping are always the same (except inode number): 2aaaaac00000-2aaabac00000 rw-p 00000000 00:0c 8200 /anon_hugepage (deleted) 2aaaaac00000-2aaabac00000 rw-p 00000000 00:0c 256 /anon_hugepage (deleted) 2aaaaac00000-2aaabac00000 rw-p 00000000 00:0c 7180 /anon_hugepage (deleted) The reason is the generic hugetlb_get_unmapped_area() function which is used on x86-64. It doesn't support randomization and use bottom-up unmapped area lookup, instead of usual top-down on x86-64. x86 has arch-specific hugetlb_get_unmapped_area(), but it's used only on x86-32. Let's use arch-specific hugetlb_get_unmapped_area() on x86-64 too. That adds ASLR and switches hugetlb mappings to use top-down unmapped area lookup: % for i in `seq 3`; do > tools/testing/selftests/vm/map_hugetlb | grep address > done Returned address is 0x7f4f08a00000 Returned address is 0x7fdda4200000 Returned address is 0x7febe0000000 /proc/PID/maps entries: 7f4f08a00000-7f4f18a00000 rw-p 00000000 00:0c 1168 /anon_hugepage (deleted) 7fdda4200000-7fddb4200000 rw-p 00000000 00:0c 7092 /anon_hugepage (deleted) 7febe0000000-7febf0000000 rw-p 00000000 00:0c 7183 /anon_hugepage (deleted) Unmapped area lookup policy for hugetlb mappings is consistent with normal mappings now -- the only difference is alignment requirements for huge pages. libhugetlbfs test-suite didn't detect any regressions with the patch applied (although it shows few failures on my machine regardless the patch). Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Cc: Matthew Wilcox Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Naoya Horiguchi Cc: Linus Torvalds Cc: Andrea Arcangeli Cc: Peter Zijlstra Cc: Mel Gorman Link: http://lkml.kernel.org/r/20131119131750.EA45CE0090@blue.fi.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 1 + arch/x86/include/asm/page_32.h | 4 ---- arch/x86/mm/hugetlbpage.c | 9 +++------ 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index c87892442e53..775873d3be55 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -71,6 +71,7 @@ extern bool __virt_addr_valid(unsigned long kaddr); #include #define __HAVE_ARCH_GATE_AREA 1 +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif /* __KERNEL__ */ #endif /* _ASM_X86_PAGE_H */ diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 4d550d04b609..904f528cc8e8 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -5,10 +5,6 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_HUGETLB_PAGE -#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA -#endif - #define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) #ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 9d980d88b747..8c9f647ff9e1 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -87,9 +87,7 @@ int pmd_huge_support(void) } #endif -/* x86_64 also uses this file */ - -#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#ifdef CONFIG_HUGETLB_PAGE static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) @@ -99,7 +97,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, info.flags = 0; info.length = len; - info.low_limit = TASK_UNMAPPED_BASE; + info.low_limit = current->mm->mmap_legacy_base; info.high_limit = TASK_SIZE; info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; @@ -172,8 +170,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return hugetlb_get_unmapped_area_topdown(file, addr, len, pgoff, flags); } - -#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ +#endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_X86_64 static __init int setup_hugepagesz(char *opt) From f1a83e652bedef88d6d77d3dc58250e08e7062bd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Nov 2013 16:42:47 +0100 Subject: [PATCH 028/981] lockdep: Correctly annotate hardirq context in irq_exit() There was a reported deadlock on -rt which lockdep didn't report. It turns out that in irq_exit() we tell lockdep that the hardirq context ends and then do all kinds of locking afterwards. To fix it, move trace_hardirq_exit() to the very end of irq_exit(), this ensures all locking in tick_irq_exit() and rcu_irq_exit() are properly recorded as happening from hardirq context. This however leads to the 'fun' little problem of running softirqs while in hardirq context. To cure this make the softirq code a little more complex (in the CONFIG_TRACE_IRQFLAGS case). Due to stack swizzling arch dependent trickery we cannot pass an argument to __do_softirq() to tell it if it was done from hardirq context or not; so use a side-band argument. When we do __do_softirq() from hardirq context, 'atomically' flip to softirq context and back, so that no locking goes without being in either hard- or soft-irq context. I didn't find any new problems in mainline using this patch, but it did show the -rt problem. Reported-by: Sebastian Andrzej Siewior Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-dgwc5cdksbn0jk09vbmcc9sa@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/softirq.c | 54 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index b24988353458..eb0acf44b063 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -213,14 +213,52 @@ EXPORT_SYMBOL(local_bh_enable_ip); #define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) #define MAX_SOFTIRQ_RESTART 10 +#ifdef CONFIG_TRACE_IRQFLAGS +/* + * Convoluted means of passing __do_softirq() a message through the various + * architecture execute_on_stack() bits. + * + * When we run softirqs from irq_exit() and thus on the hardirq stack we need + * to keep the lockdep irq context tracking as tight as possible in order to + * not miss-qualify lock contexts and miss possible deadlocks. + */ +static DEFINE_PER_CPU(int, softirq_from_hardirq); + +static inline void lockdep_softirq_from_hardirq(void) +{ + this_cpu_write(softirq_from_hardirq, 1); +} + +static inline void lockdep_softirq_start(void) +{ + if (this_cpu_read(softirq_from_hardirq)) + trace_hardirq_exit(); + lockdep_softirq_enter(); +} + +static inline void lockdep_softirq_end(void) +{ + lockdep_softirq_exit(); + if (this_cpu_read(softirq_from_hardirq)) { + this_cpu_write(softirq_from_hardirq, 0); + trace_hardirq_enter(); + } +} + +#else +static inline void lockdep_softirq_from_hardirq(void) { } +static inline void lockdep_softirq_start(void) { } +static inline void lockdep_softirq_end(void) { } +#endif + asmlinkage void __do_softirq(void) { - struct softirq_action *h; - __u32 pending; unsigned long end = jiffies + MAX_SOFTIRQ_TIME; - int cpu; unsigned long old_flags = current->flags; int max_restart = MAX_SOFTIRQ_RESTART; + struct softirq_action *h; + __u32 pending; + int cpu; /* * Mask out PF_MEMALLOC s current task context is borrowed for the @@ -233,7 +271,7 @@ asmlinkage void __do_softirq(void) account_irq_enter_time(current); __local_bh_disable(_RET_IP_, SOFTIRQ_OFFSET); - lockdep_softirq_enter(); + lockdep_softirq_start(); cpu = smp_processor_id(); restart: @@ -280,16 +318,13 @@ restart: wakeup_softirqd(); } - lockdep_softirq_exit(); - + lockdep_softirq_end(); account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET); WARN_ON_ONCE(in_interrupt()); tsk_restore_flags(current, old_flags, PF_MEMALLOC); } - - asmlinkage void do_softirq(void) { __u32 pending; @@ -332,6 +367,7 @@ void irq_enter(void) static inline void invoke_softirq(void) { if (!force_irqthreads) { + lockdep_softirq_from_hardirq(); #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /* * We can safely execute softirq on the current stack if @@ -377,13 +413,13 @@ void irq_exit(void) #endif account_irq_exit_time(current); - trace_hardirq_exit(); preempt_count_sub(HARDIRQ_OFFSET); if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); tick_irq_exit(); rcu_irq_exit(); + trace_hardirq_exit(); /* must be last! */ } /* From 3247343118daa73f2b94b7fa565425d1d9f9ac84 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 8 Nov 2013 18:52:21 +0100 Subject: [PATCH 029/981] uprobes: Add uprobe_task->dup_xol_work/dup_xol_addr uprobe_task->vaddr is a bit strange. The generic code uses it only to pass the additional argument to arch_uprobe_pre_xol(), and since it is always equal to instruction_pointer() this looks even more strange. And both utask->vaddr and and utask->autask have the same scope, they only have the meaning when the task executes the probed insn out-of-line, so it is safe to reuse both in UTASK_RUNNING state. This all means that logically ->vaddr belongs to arch_uprobe_task and we should probably move it there, arch_uprobe_pre_xol() can record instruction_pointer() itself. OTOH, it is also used by uprobe_copy_process() and dup_xol_work() for another purpose, this doesn't look clean and doesn't allow to move this member into arch_uprobe_task. This patch adds the union with 2 anonymous structs into uprobe_task. The first struct is autask + vaddr, this way we "almost" move vaddr into autask. The second struct has 2 new members for uprobe_copy_process() paths: ->dup_xol_addr which can be used instead ->vaddr, and ->dup_xol_work which can be used to avoid kmalloc() and simplify the code. Note that this union will likely have another member(s), we need something like "private_data_for_handlers" so that the tracing handlers could use it to communicate with call_fetch() methods. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 21 ++++++++++++++++----- kernel/events/uprobes.c | 16 ++++------------ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 319eae70fe84..2225542624de 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -26,6 +26,7 @@ #include #include +#include struct vm_area_struct; struct mm_struct; @@ -72,14 +73,24 @@ enum uprobe_task_state { */ struct uprobe_task { enum uprobe_task_state state; - struct arch_uprobe_task autask; + + union { + struct { + struct arch_uprobe_task autask; + unsigned long vaddr; + }; + + struct { + struct callback_head dup_xol_work; + unsigned long dup_xol_addr; + }; + }; + + struct uprobe *active_uprobe; + unsigned long xol_vaddr; struct return_instance *return_instances; unsigned int depth; - struct uprobe *active_uprobe; - - unsigned long xol_vaddr; - unsigned long vaddr; }; /* diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 24b7d6ca871b..df4ef0971266 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1403,12 +1403,10 @@ static void uprobe_warn(struct task_struct *t, const char *msg) static void dup_xol_work(struct callback_head *work) { - kfree(work); - if (current->flags & PF_EXITING) return; - if (!__create_xol_area(current->utask->vaddr)) + if (!__create_xol_area(current->utask->dup_xol_addr)) uprobe_warn(current, "dup xol area"); } @@ -1419,7 +1417,6 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; - struct callback_head *work; struct xol_area *area; t->utask = NULL; @@ -1441,14 +1438,9 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags) if (mm == t->mm) return; - /* TODO: move it into the union in uprobe_task */ - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) - return uprobe_warn(t, "dup xol area"); - - t->utask->vaddr = area->vaddr; - init_task_work(work, dup_xol_work); - task_work_add(t, work, true); + t->utask->dup_xol_addr = area->vaddr; + init_task_work(&t->utask->dup_xol_work, dup_xol_work); + task_work_add(t, &t->utask->dup_xol_work, true); } /* From 803200e24abf0f9ec18631290d26b2185477f3a6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Nov 2013 17:58:54 +0100 Subject: [PATCH 030/981] uprobes: Don't assume that arch_uprobe->insn/ixol is u8[MAX_UINSN_BYTES] arch_uprobe should be opaque as much as possible to the generic code, but currently it assumes that insn/ixol must be u8[] of the known size. Remove this unnecessary dependency, we can use "&" and and sizeof() with the same effect. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index df4ef0971266..445962a72498 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -330,7 +330,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned int __weak set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); + return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn); } static int match_uprobe(struct uprobe *l, struct uprobe *r) @@ -529,8 +529,8 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp) { struct address_space *mapping = uprobe->inode->i_mapping; loff_t offs = uprobe->offset; - void *insn = uprobe->arch.insn; - int size = MAX_UINSN_BYTES; + void *insn = &uprobe->arch.insn; + int size = sizeof(uprobe->arch.insn); int len, err = -EIO; /* Copy only available bytes, -EIO if nothing was read */ @@ -569,7 +569,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, goto out; ret = -ENOTSUPP; - if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn)) + if (is_trap_insn((uprobe_opcode_t *)&uprobe->arch.insn)) goto out; ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); @@ -1264,7 +1264,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) /* Initialize the slot */ copy_to_page(area->page, xol_vaddr, - uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); + &uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); /* * We probably need flush_icache_user_range() but it needs vma. * This should work on supported architectures too. From 3d78e945b6249d4ef2308192343f8b203b1d7ea5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Nov 2013 18:44:19 +0100 Subject: [PATCH 031/981] uprobes/powerpc: Kill arch_uprobe->ainsn powerpc has both arch_uprobe->insn and arch_uprobe->ainsn to make the generic code happy. This is no longer needed after the previous change, powerpc can just use "u32 insn". Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Ananth N Mavinakayanahalli --- arch/powerpc/include/asm/uprobes.h | 5 ++--- arch/powerpc/kernel/uprobes.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 75c6ecdb8f37..7422a999a39a 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -36,9 +36,8 @@ typedef ppc_opcode_t uprobe_opcode_t; struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; - u32 ainsn; + u32 insn; + u32 ixol; }; }; diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 59f419b935f2..003b20964ea0 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -186,7 +186,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) * emulate_step() returns 1 if the insn was successfully emulated. * For all other cases, we need to single-step in hardware. */ - ret = emulate_step(regs, auprobe->ainsn); + ret = emulate_step(regs, auprobe->insn); if (ret > 0) return true; From c912dae60ae6f659455f239298110adc67a5f3e9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Nov 2013 19:49:39 +0100 Subject: [PATCH 032/981] uprobes: Cleanup !CONFIG_UPROBES decls, unexport xol_area 1. Don't include asm/uprobes.h unconditionally, we only need it if CONFIG_UPROBES. 2. Move the definition of "struct xol_area" into uprobes.c. Perhaps we should simply kill struct uprobes_state, it buys nothing. 3. Kill the dummy definition of uprobe_get_swbp_addr(), nobody except handle_swbp() needs it. 4. Purely cosmetic, but move the decl of uprobe_get_swbp_addr() up, close to other __weak helpers. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 31 ++++--------------------------- kernel/events/uprobes.c | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 2225542624de..e32251e00e62 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -33,10 +33,6 @@ struct mm_struct; struct inode; struct notifier_block; -#ifdef CONFIG_ARCH_SUPPORTS_UPROBES -# include -#endif - #define UPROBE_HANDLER_REMOVE 1 #define UPROBE_HANDLER_MASK 1 @@ -61,6 +57,8 @@ struct uprobe_consumer { }; #ifdef CONFIG_UPROBES +#include + enum uprobe_task_state { UTASK_RUNNING, UTASK_SSTEP, @@ -93,24 +91,7 @@ struct uprobe_task { unsigned int depth; }; -/* - * On a breakpoint hit, thread contests for a slot. It frees the - * slot after singlestep. Currently a fixed number of slots are - * allocated. - */ -struct xol_area { - wait_queue_head_t wq; /* if all slots are busy */ - atomic_t slot_count; /* number of in-use slots */ - unsigned long *bitmap; /* 0 = free slot */ - struct page *page; - - /* - * We keep the vma's vm_start rather than a pointer to the vma - * itself. The probed process or a naughty kernel module could make - * the vma go away, and we must handle that reasonably gracefully. - */ - unsigned long vaddr; /* Page(s) of instruction slots */ -}; +struct xol_area; struct uprobes_state { struct xol_area *xol_area; @@ -120,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn); +extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); @@ -131,7 +113,6 @@ extern void uprobe_end_dup_mmap(void); extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t, unsigned long flags); -extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); @@ -187,10 +168,6 @@ static inline bool uprobe_deny_signal(void) { return false; } -static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) -{ - return 0; -} static inline void uprobe_free_utask(struct task_struct *t) { } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 445962a72498..51a7f535ff96 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -85,6 +85,25 @@ struct return_instance { struct return_instance *next; /* keep as stack */ }; +/* + * On a breakpoint hit, thread contests for a slot. It frees the + * slot after singlestep. Currently a fixed number of slots are + * allocated. + */ +struct xol_area { + wait_queue_head_t wq; /* if all slots are busy */ + atomic_t slot_count; /* number of in-use slots */ + unsigned long *bitmap; /* 0 = free slot */ + struct page *page; + + /* + * We keep the vma's vm_start rather than a pointer to the vma + * itself. The probed process or a naughty kernel module could make + * the vma go away, and we must handle that reasonably gracefully. + */ + unsigned long vaddr; /* Page(s) of instruction slots */ +}; + /* * valid_vma: Verify if the specified vma is an executable vma * Relax restrictions while unregistering: vm_flags might have From ad439356ae5ae7688b39f1107fd5b874850fec18 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 19 Nov 2013 17:20:21 +0100 Subject: [PATCH 033/981] uprobes: Document xol_area and arch_uprobe->insn/ixol Document xol_area and arch_uprobe. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 51a7f535ff96..b886a5e7d4ff 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -73,6 +73,17 @@ struct uprobe { struct inode *inode; /* Also hold a ref to inode */ loff_t offset; unsigned long flags; + + /* + * The generic code assumes that it has two members of unknown type + * owned by the arch-specific code: + * + * insn - copy_insn() saves the original instruction here for + * arch_uprobe_analyze_insn(). + * + * ixol - potentially modified instruction to execute out of + * line, copied to xol_area by xol_get_insn_slot(). + */ struct arch_uprobe arch; }; @@ -86,6 +97,10 @@ struct return_instance { }; /* + * Execute out of line area: anonymous executable mapping installed + * by the probed task to execute the copy of the original instruction + * mangled by set_swbp(). + * * On a breakpoint hit, thread contests for a slot. It frees the * slot after singlestep. Currently a fixed number of slots are * allocated. From 661c80192d21269c7fc566f1d547510b0c867677 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 20 Nov 2013 12:50:51 -0800 Subject: [PATCH 034/981] x86-64, copy_user: Use leal to produce 32-bit results When we are using lea to produce a 32-bit result, we can use the leal form, rather than using leaq and worry about truncation elsewhere. Make the leal explicit, both to be more obvious and since that is what gcc generates and thus is less likely to trigger obscure gas bugs. Cc: Fenghua Yu Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1384634221-6006-1-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/lib/copy_user_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index ffe4eb9f09eb..dee945d55594 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -186,7 +186,7 @@ ENTRY(copy_user_generic_unrolled) 30: shll $6,%ecx addl %ecx,%edx jmp 60f -40: lea (%rdx,%rcx,8),%rdx +40: leal (%rdx,%rcx,8),%edx jmp 60f 50: movl %ecx,%edx 60: jmp copy_user_handle_tail /* ecx is zerorest also */ @@ -252,7 +252,7 @@ ENTRY(copy_user_generic_string) ret .section .fixup,"ax" -11: lea (%rdx,%rcx,8),%rcx +11: leal (%rdx,%rcx,8),%ecx 12: movl %ecx,%edx /* ecx is zerorest also */ jmp copy_user_handle_tail .previous From e0edc78f25c020dea66742c05a7fbcb2ff3df629 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 22 Nov 2013 11:24:53 +0100 Subject: [PATCH 035/981] Documentation/memory-barriers.txt: Fix a typo in the data dependency description This typo has been there forever, it is 7.5 years old, looks like this section of our memory ordering documentation is a place where most eyes are glazed over already ;-) [ Also fix some stray spaces and stray tabs while at it, shrinking the file by 49 bytes. Visual output unchanged. ] Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney Link: http://lkml.kernel.org/n/tip-gncea9cb8igosblizfqMXrie@git.kernel.org Signed-off-by: Ingo Molnar --- Documentation/memory-barriers.txt | 42 +++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index c8c42e64e953..020cccdbdd0c 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -500,7 +500,7 @@ odd-numbered bank is idle, one can see the new value of the pointer P (&B), but the old value of the variable B (2). -Another example of where data dependency barriers might by required is where a +Another example of where data dependency barriers might be required is where a number is read from memory and then used to calculate the index for an array access: @@ -882,12 +882,12 @@ cache it for later use. Consider: - CPU 1 CPU 2 + CPU 1 CPU 2 ======================= ======================= - LOAD B - DIVIDE } Divide instructions generally - DIVIDE } take a long time to perform - LOAD A + LOAD B + DIVIDE } Divide instructions generally + DIVIDE } take a long time to perform + LOAD A Which might appear as this: @@ -910,13 +910,13 @@ Which might appear as this: Placing a read barrier or a data dependency barrier just before the second load: - CPU 1 CPU 2 + CPU 1 CPU 2 ======================= ======================= - LOAD B - DIVIDE - DIVIDE + LOAD B + DIVIDE + DIVIDE - LOAD A + LOAD A will force any value speculatively obtained to be reconsidered to an extent dependent on the type of barrier used. If there was no change made to the @@ -1887,8 +1887,8 @@ functions: space should suffice for PCI. [*] NOTE! attempting to load from the same location as was written to may - cause a malfunction - consider the 16550 Rx/Tx serial registers for - example. + cause a malfunction - consider the 16550 Rx/Tx serial registers for + example. Used with prefetchable I/O memory, an mmiowb() barrier may be required to force stores to be ordered. @@ -1955,19 +1955,19 @@ barriers for the most part act at the interface between the CPU and its cache : +--------+ +--------+ : +--------+ +-----------+ | | | | : | | | | +--------+ - | CPU | | Memory | : | CPU | | | | | - | Core |--->| Access |----->| Cache |<-->| | | | + | CPU | | Memory | : | CPU | | | | | + | Core |--->| Access |----->| Cache |<-->| | | | | | | Queue | : | | | |--->| Memory | - | | | | : | | | | | | - +--------+ +--------+ : +--------+ | | | | + | | | | : | | | | | | + +--------+ +--------+ : +--------+ | | | | : | Cache | +--------+ : | Coherency | : | Mechanism | +--------+ +--------+ +--------+ : +--------+ | | | | | | | | : | | | | | | | CPU | | Memory | : | CPU | | |--->| Device | - | Core |--->| Access |----->| Cache |<-->| | | | - | | | Queue | : | | | | | | + | Core |--->| Access |----->| Cache |<-->| | | | + | | | Queue | : | | | | | | | | | | : | | | | +--------+ +--------+ +--------+ : +--------+ +-----------+ : @@ -2090,7 +2090,7 @@ CPU's caches by some other cache event: p = &v; q = p; - + x = *q; Reads from v before v updated in cache @@ -2115,7 +2115,7 @@ queue before processing any further requests: p = &v; q = p; - + smp_read_barrier_depends() From c92132f5980666c7a52ecb53d98226c9986d32cd Mon Sep 17 00:00:00 2001 From: Chunhe Lan Date: Mon, 25 Nov 2013 11:28:41 +0100 Subject: [PATCH 036/981] edac/85xx: Add PCIe error interrupt edac support Add pcie error interrupt edac support for mpc85xx, p3041, p4080, and p5020. The mpc85xx uses the legacy interrupt report mechanism - the error interrupts are reported directly to mpic. While the p3041/ p4080/p5020 attaches the most of error interrupts to interrupt zero. And report error interrupts to mpic via interrupt 0. This patch can handle both of them. Signed-off-by: Chunhe Lan Link: http://lkml.kernel.org/r/1384712714-8826-3-git-send-email-morbidrsa@gmail.com Cc: Doug Thompson Cc: Dave Jiang Signed-off-by: Johannes Thumshirn Signed-off-by: Borislav Petkov --- drivers/edac/mpc85xx_edac.c | 98 ++++++++++++++++++++++++++++++++----- drivers/edac/mpc85xx_edac.h | 7 +++ 2 files changed, 94 insertions(+), 11 deletions(-) diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index fd46b0bd5f2a..8f9182179a7c 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -1,6 +1,8 @@ /* * Freescale MPC85xx Memory Controller kenel module * + * Parts Copyrighted (c) 2013 by Freescale Semiconductor, Inc. + * * Author: Dave Jiang * * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under @@ -196,6 +198,42 @@ static void mpc85xx_pci_check(struct edac_pci_ctl_info *pci) edac_pci_handle_npe(pci, pci->ctl_name); } +static void mpc85xx_pcie_check(struct edac_pci_ctl_info *pci) +{ + struct mpc85xx_pci_pdata *pdata = pci->pvt_info; + u32 err_detect; + + err_detect = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR); + + pr_err("PCIe error(s) detected\n"); + pr_err("PCIe ERR_DR register: 0x%08x\n", err_detect); + pr_err("PCIe ERR_CAP_STAT register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR)); + pr_err("PCIe ERR_CAP_R0 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R0)); + pr_err("PCIe ERR_CAP_R1 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R1)); + pr_err("PCIe ERR_CAP_R2 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R2)); + pr_err("PCIe ERR_CAP_R3 register: 0x%08x\n", + in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R3)); + + /* clear error bits */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, err_detect); +} + +static int mpc85xx_pcie_find_capability(struct device_node *np) +{ + struct pci_controller *hose; + + if (!np) + return -EINVAL; + + hose = pci_find_hose_for_OF_device(np); + + return early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP); +} + static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) { struct edac_pci_ctl_info *pci = dev_id; @@ -207,7 +245,10 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) if (!err_detect) return IRQ_NONE; - mpc85xx_pci_check(pci); + if (pdata->is_pcie) + mpc85xx_pcie_check(pci); + else + mpc85xx_pci_check(pci); return IRQ_HANDLED; } @@ -239,14 +280,22 @@ int mpc85xx_pci_err_probe(struct platform_device *op) pdata = pci->pvt_info; pdata->name = "mpc85xx_pci_err"; pdata->irq = NO_IRQ; + + if (mpc85xx_pcie_find_capability(op->dev.of_node) > 0) + pdata->is_pcie = true; + dev_set_drvdata(&op->dev, pci); pci->dev = &op->dev; pci->mod_name = EDAC_MOD_STR; pci->ctl_name = pdata->name; pci->dev_name = dev_name(&op->dev); - if (edac_op_state == EDAC_OPSTATE_POLL) - pci->edac_check = mpc85xx_pci_check; + if (edac_op_state == EDAC_OPSTATE_POLL) { + if (pdata->is_pcie) + pci->edac_check = mpc85xx_pcie_check; + else + pci->edac_check = mpc85xx_pci_check; + } pdata->edac_idx = edac_pci_idx++; @@ -275,16 +324,26 @@ int mpc85xx_pci_err_probe(struct platform_device *op) goto err; } - orig_pci_err_cap_dr = - in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR); + if (pdata->is_pcie) { + orig_pci_err_cap_dr = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, ~0); + orig_pci_err_en = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, 0); + } else { + orig_pci_err_cap_dr = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR); - /* PCI master abort is expected during config cycles */ - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, 0x40); + /* PCI master abort is expected during config cycles */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_CAP_DR, 0x40); - orig_pci_err_en = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); + orig_pci_err_en = + in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN); - /* disable master abort reporting */ - out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0x40); + /* disable master abort reporting */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0x40); + } /* clear error bits */ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, ~0); @@ -297,7 +356,8 @@ int mpc85xx_pci_err_probe(struct platform_device *op) if (edac_op_state == EDAC_OPSTATE_INT) { pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); res = devm_request_irq(&op->dev, pdata->irq, - mpc85xx_pci_isr, IRQF_DISABLED, + mpc85xx_pci_isr, + IRQF_DISABLED | IRQF_SHARED, "[EDAC] PCI err", pci); if (res < 0) { printk(KERN_ERR @@ -312,6 +372,22 @@ int mpc85xx_pci_err_probe(struct platform_device *op) pdata->irq); } + if (pdata->is_pcie) { + /* + * Enable all PCIe error interrupt & error detect except invalid + * PEX_CONFIG_ADDR/PEX_CONFIG_DATA access interrupt generation + * enable bit and invalid PEX_CONFIG_ADDR/PEX_CONFIG_DATA access + * detection enable bit. Because PCIe bus code to initialize and + * configure these PCIe devices on booting will use some invalid + * PEX_CONFIG_ADDR/PEX_CONFIG_DATA, edac driver prints the much + * notice information. So disable this detect to fix ugly print. + */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EN, ~0 + & ~PEX_ERR_ICCAIE_EN_BIT); + out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR, 0 + | PEX_ERR_ICCAD_DISR_BIT); + } + devres_remove_group(&op->dev, mpc85xx_pci_err_probe); edac_dbg(3, "success\n"); printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n"); diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index 932016f2cf06..8c6256436227 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -134,13 +134,19 @@ #define MPC85XX_PCI_ERR_DR 0x0000 #define MPC85XX_PCI_ERR_CAP_DR 0x0004 #define MPC85XX_PCI_ERR_EN 0x0008 +#define PEX_ERR_ICCAIE_EN_BIT 0x00020000 #define MPC85XX_PCI_ERR_ATTRIB 0x000c #define MPC85XX_PCI_ERR_ADDR 0x0010 +#define PEX_ERR_ICCAD_DISR_BIT 0x00020000 #define MPC85XX_PCI_ERR_EXT_ADDR 0x0014 #define MPC85XX_PCI_ERR_DL 0x0018 #define MPC85XX_PCI_ERR_DH 0x001c #define MPC85XX_PCI_GAS_TIMR 0x0020 #define MPC85XX_PCI_PCIX_TIMR 0x0024 +#define MPC85XX_PCIE_ERR_CAP_R0 0x0028 +#define MPC85XX_PCIE_ERR_CAP_R1 0x002c +#define MPC85XX_PCIE_ERR_CAP_R2 0x0030 +#define MPC85XX_PCIE_ERR_CAP_R3 0x0034 struct mpc85xx_mc_pdata { char *name; @@ -158,6 +164,7 @@ struct mpc85xx_l2_pdata { struct mpc85xx_pci_pdata { char *name; + bool is_pcie; int edac_idx; void __iomem *pci_vbase; int irq; From cdf8d75818e3cd30746d38f5fbcdb3745fd267a3 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 14 Nov 2013 10:14:37 +0000 Subject: [PATCH 037/981] metag: dma: remove dead code in dma_alloc_init() Meta has 2 levels of page table so the pmd folds into the pud which folds into the pgd. Therefore the !pmd check in dma_alloc_init() is dead code since it essentially checks whether: (init_mm->pgd + 0x770) == 0 Remove the check. Reported-by: Chen Gang Signed-off-by: James Hogan --- arch/metag/kernel/dma.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c index db589ad5dbc4..c700d625067a 100644 --- a/arch/metag/kernel/dma.c +++ b/arch/metag/kernel/dma.c @@ -399,11 +399,6 @@ static int __init dma_alloc_init(void) pgd = pgd_offset(&init_mm, CONSISTENT_START); pud = pud_alloc(&init_mm, pgd, CONSISTENT_START); pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START); - if (!pmd) { - pr_err("%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } WARN_ON(!pmd_none(*pmd)); pte = pte_alloc_kernel(pmd, CONSISTENT_START); From a4df02a217e9787a4b967197d9d9030c3e3c1088 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 25 Jun 2013 21:15:24 +0200 Subject: [PATCH 038/981] m68k: Mark functions only called from setup_arch() __init Some functions that are only called (indirectly) from setup_arch() lack __init annotations. Signed-off-by: Geert Uytterhoeven --- arch/m68k/amiga/config.c | 2 +- arch/m68k/apollo/config.c | 10 +++++----- arch/m68k/bvme6000/config.c | 2 +- arch/m68k/emu/natfeat.c | 3 ++- arch/m68k/kernel/setup_mm.c | 4 ++-- arch/m68k/mac/psc.c | 2 +- arch/m68k/mvme147/config.c | 2 +- arch/m68k/mvme16x/config.c | 2 +- arch/m68k/q40/config.c | 4 ++-- arch/m68k/sun3/dvma.c | 6 ++---- arch/m68k/sun3/mmu_emu.c | 3 ++- arch/m68k/sun3/sun3dvma.c | 3 ++- 12 files changed, 22 insertions(+), 21 deletions(-) diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index b819390e29cd..93ab423758da 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -140,7 +140,7 @@ static struct resource ram_resource[NUM_MEMINFO]; * Parse an Amiga-specific record in the bootinfo */ -int amiga_parse_bootinfo(const struct bi_record *record) +int __init amiga_parse_bootinfo(const struct bi_record *record) { int unknown = 0; const unsigned long *data = record->data; diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c index 3ea56b90e718..2bdde9685a29 100644 --- a/arch/m68k/apollo/config.c +++ b/arch/m68k/apollo/config.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -43,8 +44,8 @@ static const char *apollo_models[] = { [APOLLO_DN4500-APOLLO_DN3000] = "DN4500 (Roadrunner)" }; -int apollo_parse_bootinfo(const struct bi_record *record) { - +int __init apollo_parse_bootinfo(const struct bi_record *record) +{ int unknown = 0; const unsigned long *data = record->data; @@ -60,9 +61,8 @@ int apollo_parse_bootinfo(const struct bi_record *record) { return unknown; } -void dn_setup_model(void) { - - +static void __init dn_setup_model(void) +{ printk("Apollo hardware found: "); printk("[%s]\n", apollo_models[apollo_model - APOLLO_DN3000]); diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 8943aa4c18e6..1b9e31e0130f 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -50,7 +50,7 @@ void bvme6000_set_vectors (void); static irq_handler_t tick_handler; -int bvme6000_parse_bootinfo(const struct bi_record *bi) +int __init bvme6000_parse_bootinfo(const struct bi_record *bi) { if (bi->tag == BI_VME_TYPE) return 0; diff --git a/arch/m68k/emu/natfeat.c b/arch/m68k/emu/natfeat.c index 121a6660ad4e..71b78ecee75c 100644 --- a/arch/m68k/emu/natfeat.c +++ b/arch/m68k/emu/natfeat.c @@ -9,6 +9,7 @@ * the GNU General Public License (GPL), incorporated herein by reference. */ +#include #include #include #include @@ -70,7 +71,7 @@ static void nf_poweroff(void) nf_call(id); } -void nf_init(void) +void __init nf_init(void) { unsigned long id, version; char buf[256]; diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index e67e53159573..99aa9887d5a8 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -74,9 +74,9 @@ unsigned long m68k_memoffset; struct mem_info m68k_memory[NUM_MEMINFO]; EXPORT_SYMBOL(m68k_memory); -struct mem_info m68k_ramdisk; +static struct mem_info m68k_ramdisk __initdata; -static char m68k_command_line[CL_SIZE]; +static char m68k_command_line[CL_SIZE] __initdata; void (*mach_sched_init) (irq_handler_t handler) __initdata = NULL; /* machine dependent irq functions */ diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index 6f026fc302fa..cc9f3f097be7 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -54,7 +54,7 @@ static void psc_debug_dump(void) * expanded to cover what I think are the other 7 channels. */ -static void psc_dma_die_die_die(void) +static __init void psc_dma_die_die_die(void) { int i; diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 1c6262803b94..5c99154c67c8 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -51,7 +51,7 @@ static int bcd2int (unsigned char b); irq_handler_t tick_handler; -int mvme147_parse_bootinfo(const struct bi_record *bi) +int __init mvme147_parse_bootinfo(const struct bi_record *bi) { if (bi->tag == BI_VME_TYPE || bi->tag == BI_VME_BRDINFO) return 0; diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index 080a342458a1..60d8a1bc837d 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -60,7 +60,7 @@ unsigned short mvme16x_config; EXPORT_SYMBOL(mvme16x_config); -int mvme16x_parse_bootinfo(const struct bi_record *bi) +int __init mvme16x_parse_bootinfo(const struct bi_record *bi) { if (bi->tag == BI_VME_TYPE || bi->tag == BI_VME_BRDINFO) return 0; diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index 078bb744b5fe..e90fe903613e 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -154,7 +154,7 @@ static unsigned int serports[] = 0x3f8,0x2f8,0x3e8,0x2e8,0 }; -static void q40_disable_irqs(void) +static void __init q40_disable_irqs(void) { unsigned i, j; @@ -198,7 +198,7 @@ void __init config_q40(void) } -int q40_parse_bootinfo(const struct bi_record *rec) +int __init q40_parse_bootinfo(const struct bi_record *rec) { return 1; } diff --git a/arch/m68k/sun3/dvma.c b/arch/m68k/sun3/dvma.c index d522eaab4551..d95506e06c2a 100644 --- a/arch/m68k/sun3/dvma.c +++ b/arch/m68k/sun3/dvma.c @@ -7,6 +7,7 @@ * */ +#include #include #include #include @@ -62,10 +63,7 @@ int dvma_map_iommu(unsigned long kaddr, unsigned long baddr, } -void sun3_dvma_init(void) +void __init sun3_dvma_init(void) { - memset(ptelist, 0, sizeof(ptelist)); - - } diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c index 8edc510a21be..3f258e230ba5 100644 --- a/arch/m68k/sun3/mmu_emu.c +++ b/arch/m68k/sun3/mmu_emu.c @@ -6,6 +6,7 @@ ** Started 1/16/98 @ 2:22 am */ +#include #include #include #include @@ -122,7 +123,7 @@ void print_pte_vaddr (unsigned long vaddr) /* * Initialise the MMU emulator. */ -void mmu_emu_init(unsigned long bootmem_end) +void __init mmu_emu_init(unsigned long bootmem_end) { unsigned long seg, num; int i,j; diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c index cab54482ca34..ca57966ec3a2 100644 --- a/arch/m68k/sun3/sun3dvma.c +++ b/arch/m68k/sun3/sun3dvma.c @@ -6,6 +6,7 @@ * Contains common routines for sun3/sun3x DVMA management. */ +#include #include #include #include @@ -245,7 +246,7 @@ static inline int free_baddr(unsigned long baddr) } -void dvma_init(void) +void __init dvma_init(void) { struct hole *hole; From 7b3e8de9b3e9c0ffd5cb1600e6075bede4272868 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 25 Jun 2013 20:33:44 +0200 Subject: [PATCH 039/981] m68k/sun3: Dynamically allocate the table to track IOMMU use As Sun 3 kernels cannot be multi-platform due to the different Sun 3 MMU type, it made sense to statically allocate the table to track IOMMU use. However, Sun 3x kernels can be multi-platform. Furthermore, Sun 3x uses a larger table than Sun 3 (8192 bytes instead of 512 bytes). Hence switch to dynamic allocation of this table using the bootmem allocator to avoid wasting 8192 bytes when not running on a Sun 3x. As this allocator returns zeroed memory, there's no need to explicitly initialize the table to zeroes. Signed-off-by: Geert Uytterhoeven --- arch/m68k/sun3/sun3dvma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c index ca57966ec3a2..b37521a5259d 100644 --- a/arch/m68k/sun3/sun3dvma.c +++ b/arch/m68k/sun3/sun3dvma.c @@ -6,6 +6,7 @@ * Contains common routines for sun3/sun3x DVMA management. */ +#include #include #include #include @@ -31,7 +32,7 @@ static inline void dvma_unmap_iommu(unsigned long a, int b) extern void sun3_dvma_init(void); #endif -static unsigned long iommu_use[IOMMU_TOTAL_ENTRIES]; +static unsigned long *iommu_use; #define dvma_index(baddr) ((baddr - DVMA_START) >> DVMA_PAGE_SHIFT) @@ -266,7 +267,7 @@ void __init dvma_init(void) list_add(&(hole->list), &hole_list); - memset(iommu_use, 0, sizeof(iommu_use)); + iommu_use = alloc_bootmem(IOMMU_TOTAL_ENTRIES * sizeof(unsigned long)); dvma_unmap_iommu(DVMA_START, DVMA_SIZE); From f16b89bc3cdf11266b657f2f74f4db255e77b76c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 26 Jun 2013 13:18:09 +0200 Subject: [PATCH 040/981] m68k/mac: Fix comment about iop_*_present flags setup timing This is no longer done from iop_init() Signed-off-by: Geert Uytterhoeven --- arch/m68k/mac/iop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index 7d8d46127ad9..ed0d9b0473ea 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -118,9 +118,9 @@ /*#define DEBUG_IOP*/ -/* Set to non-zero if the IOPs are present. Set by iop_init() */ +/* Non-zero if the IOPs are present */ -int iop_scc_present,iop_ism_present; +int iop_scc_present, iop_ism_present; /* structure for tracking channel listeners */ From 51b9310f0a33d5280a93228475fa95e38fbcba36 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Sep 2013 09:04:34 +0200 Subject: [PATCH 041/981] m68k/defconfig: Make NFS_V4 modular instead of builtin This reduces the kernel image size by ca. 100 KiB, while still allowing NFS root. Signed-off-by: Geert Uytterhoeven --- arch/m68k/configs/amiga_defconfig | 2 +- arch/m68k/configs/apollo_defconfig | 2 +- arch/m68k/configs/atari_defconfig | 2 +- arch/m68k/configs/bvme6000_defconfig | 2 +- arch/m68k/configs/hp300_defconfig | 2 +- arch/m68k/configs/mac_defconfig | 2 +- arch/m68k/configs/multi_defconfig | 2 +- arch/m68k/configs/mvme147_defconfig | 2 +- arch/m68k/configs/mvme16x_defconfig | 2 +- arch/m68k/configs/q40_defconfig | 2 +- arch/m68k/configs/sun3_defconfig | 2 +- arch/m68k/configs/sun3x_defconfig | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 19325e117eea..04432b7ce157 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -385,7 +385,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 14dc6ccda7f4..a018fd7ed7bc 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -342,7 +342,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 6d5370c914b2..fa33639b4107 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -360,7 +360,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index c015ddb6fd80..ca310d586689 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -334,7 +334,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index ec7382d8afff..aa98958c230e 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -344,7 +344,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 7d46fbec7042..ac2c153f8a4c 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -367,7 +367,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index b17a8837f0e1..61cdb096ca43 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -445,7 +445,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 5586c6529fce..f89613f9c82b 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -334,7 +334,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index e5e8262bbacd..d086acd6aa26 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -334,7 +334,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index be1496ed9b66..3662899c0f88 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -358,7 +358,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 54674d61e001..bb87f17ef477 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -336,7 +336,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 832d9539f441..b75a0b177b24 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -336,7 +336,7 @@ CONFIG_QNX6FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m From 6074a13934202c7a68c77fd5f37da8dce731f181 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 26 Jun 2013 13:15:58 +0200 Subject: [PATCH 042/981] m68k/setup: Use pr_*() and __func__ instead of plain printk() Signed-off-by: Geert Uytterhoeven --- arch/m68k/kernel/setup_mm.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 99aa9887d5a8..9f16a15fa287 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -161,7 +161,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) m68k_memory[m68k_num_memory].size = data[1]; m68k_num_memory++; } else - printk("m68k_parse_bootinfo: too many memory chunks\n"); + pr_warn("%s: too many memory chunks\n", + __func__); break; case BI_RAMDISK: @@ -197,8 +198,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) unknown = 1; } if (unknown) - printk("m68k_parse_bootinfo: unknown tag 0x%04x ignored\n", - record->tag); + pr_warn("%s: unknown tag 0x%04x ignored\n", __func__, + record->tag); record = (struct bi_record *)((unsigned long)record + record->size); } @@ -206,8 +207,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) m68k_realnum_memory = m68k_num_memory; #ifdef CONFIG_SINGLE_MEMORY_CHUNK if (m68k_num_memory > 1) { - printk("Ignoring last %i chunks of physical memory\n", - (m68k_num_memory - 1)); + pr_warn("%s: ignoring last %i chunks of physical memory\n", + __func__, (m68k_num_memory - 1)); m68k_num_memory = 1; } #endif @@ -247,7 +248,7 @@ void __init setup_arch(char **cmdline_p) asm (".chip 68060; movec %%pcr,%0; .chip 68k" : "=d" (pcr)); if (((pcr >> 8) & 0xff) <= 5) { - printk("Enabling workaround for errata I14\n"); + pr_warn("Enabling workaround for errata I14\n"); asm (".chip 68060; movec %0,%%pcr; .chip 68k" : : "d" (pcr | 0x20)); } @@ -353,7 +354,7 @@ void __init setup_arch(char **cmdline_p) BOOTMEM_DEFAULT); initrd_start = (unsigned long)phys_to_virt(m68k_ramdisk.addr); initrd_end = initrd_start + m68k_ramdisk.size; - printk("initrd: %08lx - %08lx\n", initrd_start, initrd_end); + pr_info("initrd: %08lx - %08lx\n", initrd_start, initrd_end); } #endif @@ -538,9 +539,9 @@ void check_bugs(void) { #ifndef CONFIG_M68KFPU_EMU if (m68k_fputype == 0) { - printk(KERN_EMERG "*** YOU DO NOT HAVE A FLOATING POINT UNIT, " + pr_emerg("*** YOU DO NOT HAVE A FLOATING POINT UNIT, " "WHICH IS REQUIRED BY LINUX/M68K ***\n"); - printk(KERN_EMERG "Upgrade your hardware or join the FPU " + pr_emerg("Upgrade your hardware or join the FPU " "emulation project\n"); panic("no FPU"); } From 1ea636eb68322198e08aebdede141244d98e8299 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Aug 2012 17:05:43 +0200 Subject: [PATCH 043/981] Documentation/zorro.txt: Update path to arch-specific header files - Arch-specific headers were moved to arch//include/asm, - APUS support was dropped a long time ago Signed-off-by: Geert Uytterhoeven --- Documentation/zorro.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/zorro.txt b/Documentation/zorro.txt index d5829d14774a..d17670400564 100644 --- a/Documentation/zorro.txt +++ b/Documentation/zorro.txt @@ -95,7 +95,7 @@ The treatment of these regions depends on the type of Zorro space: ------------- linux/include/linux/zorro.h -linux/include/asm-{m68k,ppc}/zorro.h +linux/arch/m68k/include/asm/zorro.h linux/include/linux/zorro_ids.h linux/drivers/zorro /proc/bus/zorro From c293738e6d8dfb9c941759855b5161fde449644d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 29 Jun 2013 10:40:20 +0200 Subject: [PATCH 044/981] zorro: Do not allocate zorro_autocon[] statically Currently the array of Zorro devices is allocated statically, wasting up to 4.5 KiB when running an Amiga or multi-platform kernel on a machine with no or a handful of Zorro expansion cards. Convert it to conditional dynamic memory allocation to fix this. amiga_parse_bootinfo() still needs to store some information about the detected Zorro devices, at a time even the bootmem allocator is not yet available. This is now handled using a much smaller array (typically less than 0.5 KiB), which is __initdata and thus freed later. Signed-off-by: Geert Uytterhoeven --- arch/m68k/amiga/config.c | 6 +++--- arch/m68k/amiga/platform.c | 4 ++-- drivers/zorro/zorro.c | 19 ++++++++++++++++--- include/linux/zorro.h | 18 +++++++++++++++++- 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 93ab423758da..d956ddbfebe1 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -174,12 +174,12 @@ int __init amiga_parse_bootinfo(const struct bi_record *record) #ifdef CONFIG_ZORRO if (zorro_num_autocon < ZORRO_NUM_AUTO) { const struct ConfigDev *cd = (struct ConfigDev *)data; - struct zorro_dev *dev = &zorro_autocon[zorro_num_autocon++]; + struct zorro_dev_init *dev = &zorro_autocon_init[zorro_num_autocon++]; dev->rom = cd->cd_Rom; dev->slotaddr = cd->cd_SlotAddr; dev->slotsize = cd->cd_SlotSize; - dev->resource.start = (unsigned long)cd->cd_BoardAddr; - dev->resource.end = dev->resource.start + cd->cd_BoardSize - 1; + dev->boardaddr = (u32)cd->cd_BoardAddr; + dev->boardsize = cd->cd_BoardSize; } else printk("amiga_parse_bootinfo: too many AutoConfig devices\n"); #endif /* CONFIG_ZORRO */ diff --git a/arch/m68k/amiga/platform.c b/arch/m68k/amiga/platform.c index dacd9f911f71..7847b2b1b5b6 100644 --- a/arch/m68k/amiga/platform.c +++ b/arch/m68k/amiga/platform.c @@ -67,8 +67,8 @@ static int __init z_dev_present(zorro_id id) unsigned int i; for (i = 0; i < zorro_num_autocon; i++) - if (zorro_autocon[i].rom.er_Manufacturer == ZORRO_MANUF(id) && - zorro_autocon[i].rom.er_Product == ZORRO_PROD(id)) + if (zorro_autocon_init[i].rom.er_Manufacturer == ZORRO_MANUF(id) && + zorro_autocon_init[i].rom.er_Product == ZORRO_PROD(id)) return 1; return 0; diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c index 858c9714b2f3..10c7f77aec08 100644 --- a/drivers/zorro/zorro.c +++ b/drivers/zorro/zorro.c @@ -29,7 +29,8 @@ */ unsigned int zorro_num_autocon; -struct zorro_dev zorro_autocon[ZORRO_NUM_AUTO]; +struct zorro_dev_init zorro_autocon_init[ZORRO_NUM_AUTO] __initdata; +struct zorro_dev *zorro_autocon; /* @@ -38,6 +39,7 @@ struct zorro_dev zorro_autocon[ZORRO_NUM_AUTO]; struct zorro_bus { struct device dev; + struct zorro_dev devices[0]; }; @@ -125,16 +127,20 @@ static struct resource __init *zorro_find_parent_resource( static int __init amiga_zorro_probe(struct platform_device *pdev) { struct zorro_bus *bus; + struct zorro_dev_init *zi; struct zorro_dev *z; struct resource *r; unsigned int i; int error; /* Initialize the Zorro bus */ - bus = kzalloc(sizeof(*bus), GFP_KERNEL); + bus = kzalloc(sizeof(*bus) + + zorro_num_autocon * sizeof(bus->devices[0]), + GFP_KERNEL); if (!bus) return -ENOMEM; + zorro_autocon = bus->devices; bus->dev.parent = &pdev->dev; dev_set_name(&bus->dev, "zorro"); error = device_register(&bus->dev); @@ -151,15 +157,22 @@ static int __init amiga_zorro_probe(struct platform_device *pdev) /* First identify all devices ... */ for (i = 0; i < zorro_num_autocon; i++) { + zi = &zorro_autocon_init[i]; z = &zorro_autocon[i]; + + z->rom = zi->rom; z->id = (z->rom.er_Manufacturer<<16) | (z->rom.er_Product<<8); if (z->id == ZORRO_PROD_GVP_EPC_BASE) { /* GVP quirk */ - unsigned long magic = zorro_resource_start(z)+0x8000; + unsigned long magic = zi->boardaddr + 0x8000; z->id |= *(u16 *)ZTWO_VADDR(magic) & GVP_PRODMASK; } + z->slotaddr = zi->slotaddr; + z->slotsize = zi->slotsize; sprintf(z->name, "Zorro device %08x", z->id); zorro_name_device(z); + z->resource.start = zi->boardaddr; + z->resource.end = zi->boardaddr + zi->boardsize - 1; z->resource.name = z->name; r = zorro_find_parent_resource(pdev, z); error = request_resource(r, &z->resource); diff --git a/include/linux/zorro.h b/include/linux/zorro.h index dff42025649b..661cbd2a86ee 100644 --- a/include/linux/zorro.h +++ b/include/linux/zorro.h @@ -175,7 +175,23 @@ static inline struct zorro_driver *zorro_dev_driver(const struct zorro_dev *z) extern unsigned int zorro_num_autocon; /* # of autoconfig devices found */ -extern struct zorro_dev zorro_autocon[ZORRO_NUM_AUTO]; +extern struct zorro_dev *zorro_autocon; + + + /* + * Minimal information about a Zorro device, passed from bootinfo + * Only available temporarily, i.e. until initmem has been freed! + */ + +struct zorro_dev_init { + struct ExpansionRom rom; + u16 slotaddr; + u16 slotsize; + u32 boardaddr; + u32 boardsize; +}; + +extern struct zorro_dev_init zorro_autocon_init[ZORRO_NUM_AUTO] __initdata; /* From bd79014cd47b3a0fa59a7a17489d8ada56ecab9b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 30 Jun 2013 13:01:26 +0200 Subject: [PATCH 045/981] zorro: Don't fill in dummy names in zorro_name_device() If the device is not found in the database, it's not needed to fill in a dummy name. The caller of zorro_name_device() has already taken care of that to support CONFIG_ZORRO_NAMES=n. Signed-off-by: Geert Uytterhoeven --- drivers/zorro/names.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/zorro/names.c b/drivers/zorro/names.c index e8517c3d8e82..e44e04c4f29d 100644 --- a/drivers/zorro/names.c +++ b/drivers/zorro/names.c @@ -69,7 +69,6 @@ void __init zorro_name_device(struct zorro_dev *dev) } while (--i); /* Couldn't find either the manufacturer nor the product */ - sprintf(name, "Zorro device %08x", dev->id); return; match_manuf: { From 52182c758f7c9bc34dbee0dfdce13d356de50f03 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 17 May 2013 14:01:24 +0200 Subject: [PATCH 046/981] zorro: Refactor conditional handling of Zorro device name database Using an empty static inline function in the CONFIG_ZORRO_NAMES=n case allows to drop compilation of names.c. Signed-off-by: Geert Uytterhoeven --- drivers/zorro/Makefile | 3 ++- drivers/zorro/names.c | 10 ---------- drivers/zorro/zorro.h | 5 +++++ 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/zorro/Makefile b/drivers/zorro/Makefile index f62172603215..7dc5332ff984 100644 --- a/drivers/zorro/Makefile +++ b/drivers/zorro/Makefile @@ -2,8 +2,9 @@ # Makefile for the Zorro bus specific drivers. # -obj-$(CONFIG_ZORRO) += zorro.o zorro-driver.o zorro-sysfs.o names.o +obj-$(CONFIG_ZORRO) += zorro.o zorro-driver.o zorro-sysfs.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_ZORRO_NAMES) += names.o hostprogs-y := gen-devlist diff --git a/drivers/zorro/names.c b/drivers/zorro/names.c index e44e04c4f29d..6f3fd9903ac3 100644 --- a/drivers/zorro/names.c +++ b/drivers/zorro/names.c @@ -15,8 +15,6 @@ #include -#ifdef CONFIG_ZORRO_NAMES - struct zorro_prod_info { __u16 prod; unsigned short seen; @@ -97,11 +95,3 @@ void __init zorro_name_device(struct zorro_dev *dev) } } } - -#else - -void __init zorro_name_device(struct zorro_dev *dev) -{ -} - -#endif diff --git a/drivers/zorro/zorro.h b/drivers/zorro/zorro.h index b682d5ccd63f..34119fb4e560 100644 --- a/drivers/zorro/zorro.h +++ b/drivers/zorro/zorro.h @@ -1,4 +1,9 @@ +#ifdef CONFIG_ZORRO_NAMES extern void zorro_name_device(struct zorro_dev *z); +#else +static inline void zorro_name_device(struct zorro_dev *dev) { } +#endif + extern int zorro_create_sysfs_dev_files(struct zorro_dev *z); From 83b7bce3d390f15047e05a186bb4051536ee9dbc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 16 Oct 2013 11:28:54 +0200 Subject: [PATCH 047/981] zorro: Let the driver core handle device enumeration Filling in dev_name of the Zorro bus type and dev.id of each device allows the driver core to enumerate devices, so we don't have to do that ourselves. This changes the names of devices in sysfs from "%02x" to "zorro%u". Note that filling in dev.id is also needed to support MFD Zorro devices. Signed-off-by: Geert Uytterhoeven --- drivers/zorro/zorro-driver.c | 11 ++++++----- drivers/zorro/zorro.c | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/zorro/zorro-driver.c b/drivers/zorro/zorro-driver.c index ac1db7f1bcab..eacae1434b73 100644 --- a/drivers/zorro/zorro-driver.c +++ b/drivers/zorro/zorro-driver.c @@ -161,11 +161,12 @@ static int zorro_uevent(struct device *dev, struct kobj_uevent_env *env) } struct bus_type zorro_bus_type = { - .name = "zorro", - .match = zorro_bus_match, - .uevent = zorro_uevent, - .probe = zorro_device_probe, - .remove = zorro_device_remove, + .name = "zorro", + .dev_name = "zorro", + .match = zorro_bus_match, + .uevent = zorro_uevent, + .probe = zorro_device_probe, + .remove = zorro_device_remove, }; EXPORT_SYMBOL(zorro_bus_type); diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c index 10c7f77aec08..450abf100f06 100644 --- a/drivers/zorro/zorro.c +++ b/drivers/zorro/zorro.c @@ -142,7 +142,7 @@ static int __init amiga_zorro_probe(struct platform_device *pdev) zorro_autocon = bus->devices; bus->dev.parent = &pdev->dev; - dev_set_name(&bus->dev, "zorro"); + dev_set_name(&bus->dev, zorro_bus_type.name); error = device_register(&bus->dev); if (error) { pr_err("Zorro: Error registering zorro_bus\n"); @@ -180,9 +180,9 @@ static int __init amiga_zorro_probe(struct platform_device *pdev) dev_err(&bus->dev, "Address space collision on device %s %pR\n", z->name, &z->resource); - dev_set_name(&z->dev, "%02x", i); z->dev.parent = &bus->dev; z->dev.bus = &zorro_bus_type; + z->dev.id = i; } /* ... then register them */ From 6112ea0862facaeaeab504ee01c0d04bcd22daaf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 9 Jan 2011 11:03:43 +0100 Subject: [PATCH 048/981] zorro: ZTWO_VADDR() should return "void __iomem *" ZTWO_VADDR() converts from physical to virtual I/O addresses, so it should return "void __iomem *" instead of "unsigned long". This allows to drop several casts, but requires adding a few casts to accomodate legacy driver frameworks that store "unsigned long" I/O addresses. Signed-off-by: Geert Uytterhoeven --- arch/m68k/amiga/chipram.c | 2 +- arch/m68k/amiga/config.c | 2 +- arch/m68k/include/asm/amigahw.h | 2 +- drivers/block/z2ram.c | 4 ++-- drivers/ide/buddha.c | 2 +- drivers/net/ethernet/8390/hydra.c | 2 +- drivers/net/ethernet/8390/zorro8390.c | 4 ++-- drivers/net/ethernet/amd/a2065.c | 4 ++-- drivers/net/ethernet/amd/ariadne.c | 4 ++-- drivers/parport/parport_mfc3.c | 2 +- drivers/scsi/a2091.c | 2 +- drivers/scsi/a3000.c | 2 +- drivers/scsi/a4000t.c | 2 +- drivers/scsi/gvp11.c | 2 +- drivers/scsi/zorro7xx.c | 2 +- drivers/video/amifb.c | 2 +- drivers/video/cirrusfb.c | 4 ++-- 17 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/m68k/amiga/chipram.c b/arch/m68k/amiga/chipram.c index 99449fbf9a72..ba03cec3f711 100644 --- a/arch/m68k/amiga/chipram.c +++ b/arch/m68k/amiga/chipram.c @@ -87,7 +87,7 @@ void *amiga_chip_alloc_res(unsigned long size, struct resource *res) atomic_sub(size, &chipavail); pr_debug("amiga_chip_alloc_res: returning %pR\n", res); - return (void *)ZTWO_VADDR(res->start); + return ZTWO_VADDR(res->start); } void amiga_chip_free(void *ptr) diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index d956ddbfebe1..acd9c1640cfc 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -618,7 +618,7 @@ static int __init amiga_savekmsg_setup(char *arg) /* Just steal the block, the chipram allocator isn't functional yet */ amiga_chip_size -= SAVEKMSG_MAXMEM; - savekmsg = (void *)ZTWO_VADDR(CHIP_PHYSADDR + amiga_chip_size); + savekmsg = ZTWO_VADDR(CHIP_PHYSADDR + amiga_chip_size); savekmsg->magic1 = SAVEKMSG_MAGIC1; savekmsg->magic2 = SAVEKMSG_MAGIC2; savekmsg->magicptr = ZTWO_PADDR(savekmsg); diff --git a/arch/m68k/include/asm/amigahw.h b/arch/m68k/include/asm/amigahw.h index 7a19b5686a4a..3bef0b2ad945 100644 --- a/arch/m68k/include/asm/amigahw.h +++ b/arch/m68k/include/asm/amigahw.h @@ -266,7 +266,7 @@ struct CIA { #define zTwoBase (0x80000000) #define ZTWO_PADDR(x) (((unsigned long)(x))-zTwoBase) -#define ZTWO_VADDR(x) (((unsigned long)(x))+zTwoBase) +#define ZTWO_VADDR(x) ((void __iomem *)(((unsigned long)(x))+zTwoBase)) #define CUSTOM_PHYSADDR (0xdff000) #define amiga_custom ((*(volatile struct CUSTOM *)(zTwoBase+CUSTOM_PHYSADDR))) diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 5a95baf4b104..8b2a60cee3a0 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -116,8 +116,8 @@ get_z2ram( void ) if ( test_bit( i, zorro_unused_z2ram ) ) { z2_count++; - z2ram_map[ z2ram_size++ ] = - ZTWO_VADDR( Z2RAM_START ) + ( i << Z2RAM_CHUNKSHIFT ); + z2ram_map[z2ram_size++] = (unsigned long)ZTWO_VADDR(Z2RAM_START) + + (i << Z2RAM_CHUNKSHIFT); clear_bit( i, zorro_unused_z2ram ); } } diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c index b1d38590ac01..46eaf58d881b 100644 --- a/drivers/ide/buddha.c +++ b/drivers/ide/buddha.c @@ -198,7 +198,7 @@ fail_base2: continue; } } - buddha_board = ZTWO_VADDR(board); + buddha_board = (unsigned long)ZTWO_VADDR(board); /* write to BUDDHA_IRQ_MR to enable the board IRQ */ /* X-Surf doesn't have this. IRQs are always on */ diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c index fb3dd4399cf3..f615fdec0f1b 100644 --- a/drivers/net/ethernet/8390/hydra.c +++ b/drivers/net/ethernet/8390/hydra.c @@ -113,7 +113,7 @@ static const struct net_device_ops hydra_netdev_ops = { static int hydra_init(struct zorro_dev *z) { struct net_device *dev; - unsigned long board = ZTWO_VADDR(z->resource.start); + unsigned long board = (unsigned long)ZTWO_VADDR(z->resource.start); unsigned long ioaddr = board+HYDRA_NIC_BASE; const char name[] = "NE2000"; int start_page, stop_page; diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c index 85ec4c2d2645..ae2a12b7db62 100644 --- a/drivers/net/ethernet/8390/zorro8390.c +++ b/drivers/net/ethernet/8390/zorro8390.c @@ -287,7 +287,7 @@ static const struct net_device_ops zorro8390_netdev_ops = { }; static int zorro8390_init(struct net_device *dev, unsigned long board, - const char *name, unsigned long ioaddr) + const char *name, void __iomem *ioaddr) { int i; int err; @@ -354,7 +354,7 @@ static int zorro8390_init(struct net_device *dev, unsigned long board, start_page = NESM_START_PG; stop_page = NESM_STOP_PG; - dev->base_addr = ioaddr; + dev->base_addr = (unsigned long)ioaddr; dev->irq = IRQ_AMIGA_PORTS; /* Install the Interrupt handler */ diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index 0866e7627433..f492a19328e5 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -713,8 +713,8 @@ static int a2065_init_one(struct zorro_dev *z, dev->dev_addr[3] = (z->rom.er_SerialNumber >> 16) & 0xff; dev->dev_addr[4] = (z->rom.er_SerialNumber >> 8) & 0xff; dev->dev_addr[5] = z->rom.er_SerialNumber & 0xff; - dev->base_addr = ZTWO_VADDR(base_addr); - dev->mem_start = ZTWO_VADDR(mem_start); + dev->base_addr = (unsigned long)ZTWO_VADDR(base_addr); + dev->mem_start = (unsigned long)ZTWO_VADDR(mem_start); dev->mem_end = dev->mem_start + A2065_RAM_SIZE; priv->ll = (volatile struct lance_regs *)dev->base_addr; diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c index c178eb4c8166..33822cb69366 100644 --- a/drivers/net/ethernet/amd/ariadne.c +++ b/drivers/net/ethernet/amd/ariadne.c @@ -747,8 +747,8 @@ static int ariadne_init_one(struct zorro_dev *z, dev->dev_addr[3] = (z->rom.er_SerialNumber >> 16) & 0xff; dev->dev_addr[4] = (z->rom.er_SerialNumber >> 8) & 0xff; dev->dev_addr[5] = z->rom.er_SerialNumber & 0xff; - dev->base_addr = ZTWO_VADDR(base_addr); - dev->mem_start = ZTWO_VADDR(mem_start); + dev->base_addr = (unsigned long)ZTWO_VADDR(base_addr); + dev->mem_start = (unsigned long)ZTWO_VADDR(mem_start); dev->mem_end = dev->mem_start + ARIADNE_RAM_SIZE; dev->netdev_ops = &ariadne_netdev_ops; diff --git a/drivers/parport/parport_mfc3.c b/drivers/parport/parport_mfc3.c index 7578d79b3688..2f650f68af14 100644 --- a/drivers/parport/parport_mfc3.c +++ b/drivers/parport/parport_mfc3.c @@ -300,7 +300,7 @@ static int __init parport_mfc3_init(void) if (!request_mem_region(piabase, sizeof(struct pia), "PIA")) continue; - pp = (struct pia *)ZTWO_VADDR(piabase); + pp = ZTWO_VADDR(piabase); pp->crb = 0; pp->pddrb = 255; /* all data pins output */ pp->crb = PIA_DDR|32|8; diff --git a/drivers/scsi/a2091.c b/drivers/scsi/a2091.c index 30fa38a0ad39..9176bfbd5745 100644 --- a/drivers/scsi/a2091.c +++ b/drivers/scsi/a2091.c @@ -201,7 +201,7 @@ static int a2091_probe(struct zorro_dev *z, const struct zorro_device_id *ent) instance->irq = IRQ_AMIGA_PORTS; instance->unique_id = z->slotaddr; - regs = (struct a2091_scsiregs *)ZTWO_VADDR(z->resource.start); + regs = ZTWO_VADDR(z->resource.start); regs->DAWR = DAWR_A2091; wdregs.SASR = ®s->SASR; diff --git a/drivers/scsi/a3000.c b/drivers/scsi/a3000.c index c0f4f4290dd6..dd5b64726ddc 100644 --- a/drivers/scsi/a3000.c +++ b/drivers/scsi/a3000.c @@ -220,7 +220,7 @@ static int __init amiga_a3000_scsi_probe(struct platform_device *pdev) instance->irq = IRQ_AMIGA_PORTS; - regs = (struct a3000_scsiregs *)ZTWO_VADDR(res->start); + regs = ZTWO_VADDR(res->start); regs->DAWR = DAWR_A3000; wdregs.SASR = ®s->SASR; diff --git a/drivers/scsi/a4000t.c b/drivers/scsi/a4000t.c index 70c521f79f7c..f5a2ab41543b 100644 --- a/drivers/scsi/a4000t.c +++ b/drivers/scsi/a4000t.c @@ -56,7 +56,7 @@ static int __init amiga_a4000t_scsi_probe(struct platform_device *pdev) scsi_addr = res->start + A4000T_SCSI_OFFSET; /* Fill in the required pieces of hostdata */ - hostdata->base = (void __iomem *)ZTWO_VADDR(scsi_addr); + hostdata->base = ZTWO_VADDR(scsi_addr); hostdata->clock = 50; hostdata->chip710 = 1; hostdata->dmode_extra = DMODE_FC2; diff --git a/drivers/scsi/gvp11.c b/drivers/scsi/gvp11.c index 2203ac281103..3b6f83ffddc4 100644 --- a/drivers/scsi/gvp11.c +++ b/drivers/scsi/gvp11.c @@ -310,7 +310,7 @@ static int gvp11_probe(struct zorro_dev *z, const struct zorro_device_id *ent) if (!request_mem_region(address, 256, "wd33c93")) return -EBUSY; - regs = (struct gvp11_scsiregs *)(ZTWO_VADDR(address)); + regs = ZTWO_VADDR(address); error = check_wd33c93(regs); if (error) diff --git a/drivers/scsi/zorro7xx.c b/drivers/scsi/zorro7xx.c index cbf3476c68cd..aff31991aea9 100644 --- a/drivers/scsi/zorro7xx.c +++ b/drivers/scsi/zorro7xx.c @@ -104,7 +104,7 @@ static int zorro7xx_init_one(struct zorro_dev *z, if (ioaddr > 0x01000000) hostdata->base = ioremap(ioaddr, zorro_resource_len(z)); else - hostdata->base = (void __iomem *)ZTWO_VADDR(ioaddr); + hostdata->base = ZTWO_VADDR(ioaddr); hostdata->clock = 50; hostdata->chip710 = 1; diff --git a/drivers/video/amifb.c b/drivers/video/amifb.c index 0dac36ce09d6..518f790ef88a 100644 --- a/drivers/video/amifb.c +++ b/drivers/video/amifb.c @@ -3710,7 +3710,7 @@ default_chipset: if (!videomemory) { dev_warn(&pdev->dev, "Unable to map videomem cached writethrough\n"); - info->screen_base = (char *)ZTWO_VADDR(info->fix.smem_start); + info->screen_base = ZTWO_VADDR(info->fix.smem_start); } else info->screen_base = (char *)videomemory; diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c index 5aab9b9dc210..d992aa5eb3f0 100644 --- a/drivers/video/cirrusfb.c +++ b/drivers/video/cirrusfb.c @@ -2256,7 +2256,7 @@ static int cirrusfb_zorro_register(struct zorro_dev *z, info->fix.mmio_start = regbase; cinfo->regbase = regbase > 16 * MB_ ? ioremap(regbase, 64 * 1024) - : (caddr_t)ZTWO_VADDR(regbase); + : ZTWO_VADDR(regbase); if (!cinfo->regbase) { dev_err(info->device, "Cannot map registers\n"); error = -EIO; @@ -2266,7 +2266,7 @@ static int cirrusfb_zorro_register(struct zorro_dev *z, info->fix.smem_start = rambase; info->screen_size = ramsize; info->screen_base = rambase > 16 * MB_ ? ioremap(rambase, ramsize) - : (caddr_t)ZTWO_VADDR(rambase); + : ZTWO_VADDR(rambase); if (!info->screen_base) { dev_err(info->device, "Cannot map video RAM\n"); error = -EIO; From 986ea58dfb52097999cc388880505fc7a10e4779 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 3 Oct 2013 10:45:36 +0200 Subject: [PATCH 049/981] zorro/UAPI: Disintegrate include/linux/zorro*.h The Zorro definitions and device IDs are used by bootstraps, hence they should be exported through UAPI. Unfortunately zorro.h was never marked for export when headers_install was introduced, so it was forgotten during the big UAPI disintegration. In addition, the removal of zorro_ids.h had been sneaked into commit 7e7a43c32a8970ea2bfc3d1af353dcb1a9237769 ("PCI: don't export device IDs to userspace") before, so it was also forgotten. Split off and export the Zorro definitions used by bootstraps. Signed-off-by: Geert Uytterhoeven --- Documentation/zorro.txt | 3 +- include/linux/zorro.h | 105 +------------------------ include/uapi/linux/Kbuild | 2 + include/uapi/linux/zorro.h | 113 +++++++++++++++++++++++++++ include/{ => uapi}/linux/zorro_ids.h | 0 5 files changed, 120 insertions(+), 103 deletions(-) create mode 100644 include/uapi/linux/zorro.h rename include/{ => uapi}/linux/zorro_ids.h (100%) diff --git a/Documentation/zorro.txt b/Documentation/zorro.txt index d17670400564..90a64d52bea2 100644 --- a/Documentation/zorro.txt +++ b/Documentation/zorro.txt @@ -95,8 +95,9 @@ The treatment of these regions depends on the type of Zorro space: ------------- linux/include/linux/zorro.h +linux/include/uapi/linux/zorro.h +linux/include/uapi/linux/zorro_ids.h linux/arch/m68k/include/asm/zorro.h -linux/include/linux/zorro_ids.h linux/drivers/zorro /proc/bus/zorro diff --git a/include/linux/zorro.h b/include/linux/zorro.h index 661cbd2a86ee..63fbba0740c2 100644 --- a/include/linux/zorro.h +++ b/include/linux/zorro.h @@ -11,107 +11,10 @@ #ifndef _LINUX_ZORRO_H #define _LINUX_ZORRO_H + +#include + #include - - - /* - * Each Zorro board has a 32-bit ID of the form - * - * mmmmmmmmmmmmmmmmppppppppeeeeeeee - * - * with - * - * mmmmmmmmmmmmmmmm 16-bit Manufacturer ID (assigned by CBM (sigh)) - * pppppppp 8-bit Product ID (assigned by manufacturer) - * eeeeeeee 8-bit Extended Product ID (currently only used - * for some GVP boards) - */ - - -#define ZORRO_MANUF(id) ((id) >> 16) -#define ZORRO_PROD(id) (((id) >> 8) & 0xff) -#define ZORRO_EPC(id) ((id) & 0xff) - -#define ZORRO_ID(manuf, prod, epc) \ - ((ZORRO_MANUF_##manuf << 16) | ((prod) << 8) | (epc)) - -typedef __u32 zorro_id; - - -/* Include the ID list */ -#include - - - /* - * GVP identifies most of its products through the 'extended product code' - * (epc). The epc has to be ANDed with the GVP_PRODMASK before the - * identification. - */ - -#define GVP_PRODMASK (0xf8) -#define GVP_SCSICLKMASK (0x01) - -enum GVP_flags { - GVP_IO = 0x01, - GVP_ACCEL = 0x02, - GVP_SCSI = 0x04, - GVP_24BITDMA = 0x08, - GVP_25BITDMA = 0x10, - GVP_NOBANK = 0x20, - GVP_14MHZ = 0x40, -}; - - -struct Node { - struct Node *ln_Succ; /* Pointer to next (successor) */ - struct Node *ln_Pred; /* Pointer to previous (predecessor) */ - __u8 ln_Type; - __s8 ln_Pri; /* Priority, for sorting */ - __s8 *ln_Name; /* ID string, null terminated */ -} __attribute__ ((packed)); - -struct ExpansionRom { - /* -First 16 bytes of the expansion ROM */ - __u8 er_Type; /* Board type, size and flags */ - __u8 er_Product; /* Product number, assigned by manufacturer */ - __u8 er_Flags; /* Flags */ - __u8 er_Reserved03; /* Must be zero ($ff inverted) */ - __u16 er_Manufacturer; /* Unique ID, ASSIGNED BY COMMODORE-AMIGA! */ - __u32 er_SerialNumber; /* Available for use by manufacturer */ - __u16 er_InitDiagVec; /* Offset to optional "DiagArea" structure */ - __u8 er_Reserved0c; - __u8 er_Reserved0d; - __u8 er_Reserved0e; - __u8 er_Reserved0f; -} __attribute__ ((packed)); - -/* er_Type board type bits */ -#define ERT_TYPEMASK 0xc0 -#define ERT_ZORROII 0xc0 -#define ERT_ZORROIII 0x80 - -/* other bits defined in er_Type */ -#define ERTB_MEMLIST 5 /* Link RAM into free memory list */ -#define ERTF_MEMLIST (1<<5) - -struct ConfigDev { - struct Node cd_Node; - __u8 cd_Flags; /* (read/write) */ - __u8 cd_Pad; /* reserved */ - struct ExpansionRom cd_Rom; /* copy of board's expansion ROM */ - void *cd_BoardAddr; /* where in memory the board was placed */ - __u32 cd_BoardSize; /* size of board in bytes */ - __u16 cd_SlotAddr; /* which slot number (PRIVATE) */ - __u16 cd_SlotSize; /* number of slots (PRIVATE) */ - void *cd_Driver; /* pointer to node of driver */ - struct ConfigDev *cd_NextCD; /* linked list of drivers to config */ - __u32 cd_Unused[4]; /* for whatever the driver wants */ -} __attribute__ ((packed)); - -#define ZORRO_NUM_AUTO 16 - -#ifdef __KERNEL__ - #include #include #include @@ -245,6 +148,4 @@ extern DECLARE_BITMAP(zorro_unused_z2ram, 128); #define Z2RAM_CHUNKSHIFT (16) -#endif /* __KERNEL__ */ - #endif /* _LINUX_ZORRO_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 33d2b8fe166d..3ce25b5d75a9 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -426,3 +426,5 @@ header-y += x25.h header-y += xattr.h header-y += xfrm.h header-y += hw_breakpoint.h +header-y += zorro.h +header-y += zorro_ids.h diff --git a/include/uapi/linux/zorro.h b/include/uapi/linux/zorro.h new file mode 100644 index 000000000000..36a033e23d31 --- /dev/null +++ b/include/uapi/linux/zorro.h @@ -0,0 +1,113 @@ +/* + * linux/zorro.h -- Amiga AutoConfig (Zorro) Bus Definitions + * + * Copyright (C) 1995--2003 Geert Uytterhoeven + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#ifndef _UAPI_LINUX_ZORRO_H +#define _UAPI_LINUX_ZORRO_H + +#include + + + /* + * Each Zorro board has a 32-bit ID of the form + * + * mmmmmmmmmmmmmmmmppppppppeeeeeeee + * + * with + * + * mmmmmmmmmmmmmmmm 16-bit Manufacturer ID (assigned by CBM (sigh)) + * pppppppp 8-bit Product ID (assigned by manufacturer) + * eeeeeeee 8-bit Extended Product ID (currently only used + * for some GVP boards) + */ + + +#define ZORRO_MANUF(id) ((id) >> 16) +#define ZORRO_PROD(id) (((id) >> 8) & 0xff) +#define ZORRO_EPC(id) ((id) & 0xff) + +#define ZORRO_ID(manuf, prod, epc) \ + ((ZORRO_MANUF_##manuf << 16) | ((prod) << 8) | (epc)) + +typedef __u32 zorro_id; + + +/* Include the ID list */ +#include + + + /* + * GVP identifies most of its products through the 'extended product code' + * (epc). The epc has to be ANDed with the GVP_PRODMASK before the + * identification. + */ + +#define GVP_PRODMASK (0xf8) +#define GVP_SCSICLKMASK (0x01) + +enum GVP_flags { + GVP_IO = 0x01, + GVP_ACCEL = 0x02, + GVP_SCSI = 0x04, + GVP_24BITDMA = 0x08, + GVP_25BITDMA = 0x10, + GVP_NOBANK = 0x20, + GVP_14MHZ = 0x40, +}; + + +struct Node { + struct Node *ln_Succ; /* Pointer to next (successor) */ + struct Node *ln_Pred; /* Pointer to previous (predecessor) */ + __u8 ln_Type; + __s8 ln_Pri; /* Priority, for sorting */ + __s8 *ln_Name; /* ID string, null terminated */ +} __packed; + +struct ExpansionRom { + /* -First 16 bytes of the expansion ROM */ + __u8 er_Type; /* Board type, size and flags */ + __u8 er_Product; /* Product number, assigned by manufacturer */ + __u8 er_Flags; /* Flags */ + __u8 er_Reserved03; /* Must be zero ($ff inverted) */ + __u16 er_Manufacturer; /* Unique ID, ASSIGNED BY COMMODORE-AMIGA! */ + __u32 er_SerialNumber; /* Available for use by manufacturer */ + __u16 er_InitDiagVec; /* Offset to optional "DiagArea" structure */ + __u8 er_Reserved0c; + __u8 er_Reserved0d; + __u8 er_Reserved0e; + __u8 er_Reserved0f; +} __packed; + +/* er_Type board type bits */ +#define ERT_TYPEMASK 0xc0 +#define ERT_ZORROII 0xc0 +#define ERT_ZORROIII 0x80 + +/* other bits defined in er_Type */ +#define ERTB_MEMLIST 5 /* Link RAM into free memory list */ +#define ERTF_MEMLIST (1<<5) + +struct ConfigDev { + struct Node cd_Node; + __u8 cd_Flags; /* (read/write) */ + __u8 cd_Pad; /* reserved */ + struct ExpansionRom cd_Rom; /* copy of board's expansion ROM */ + void *cd_BoardAddr; /* where in memory the board was placed */ + __u32 cd_BoardSize; /* size of board in bytes */ + __u16 cd_SlotAddr; /* which slot number (PRIVATE) */ + __u16 cd_SlotSize; /* number of slots (PRIVATE) */ + void *cd_Driver; /* pointer to node of driver */ + struct ConfigDev *cd_NextCD; /* linked list of drivers to config */ + __u32 cd_Unused[4]; /* for whatever the driver wants */ +} __packed; + +#define ZORRO_NUM_AUTO 16 + +#endif /* _UAPI_LINUX_ZORRO_H */ diff --git a/include/linux/zorro_ids.h b/include/uapi/linux/zorro_ids.h similarity index 100% rename from include/linux/zorro_ids.h rename to include/uapi/linux/zorro_ids.h From bd9ba8f40ee30edf31cc0845d8838bc43d172ef3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Oct 2013 09:38:53 +0200 Subject: [PATCH 050/981] zorro/UAPI: Use proper types (endianness/size) in Fix member definitions for non-native userspace handling: - All multi-byte values are big-endian, hence use __be*, - All pointers are 32-bit pointers under AmigaOS, but unused (except for cd_BoardAddr) under Linux, hence use __be32. Signed-off-by: Geert Uytterhoeven --- arch/m68k/amiga/config.c | 9 +++--- arch/m68k/amiga/platform.c | 9 ++++-- drivers/net/ethernet/amd/a2065.c | 9 ++++-- drivers/net/ethernet/amd/ariadne.c | 9 ++++-- drivers/zorro/proc.c | 10 ++++--- drivers/zorro/zorro-sysfs.c | 22 ++++++++++---- drivers/zorro/zorro.c | 4 ++- include/uapi/linux/zorro.h | 46 +++++++++++++++--------------- 8 files changed, 72 insertions(+), 46 deletions(-) diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index acd9c1640cfc..65b5e937ebba 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -176,10 +177,10 @@ int __init amiga_parse_bootinfo(const struct bi_record *record) const struct ConfigDev *cd = (struct ConfigDev *)data; struct zorro_dev_init *dev = &zorro_autocon_init[zorro_num_autocon++]; dev->rom = cd->cd_Rom; - dev->slotaddr = cd->cd_SlotAddr; - dev->slotsize = cd->cd_SlotSize; - dev->boardaddr = (u32)cd->cd_BoardAddr; - dev->boardsize = cd->cd_BoardSize; + dev->slotaddr = be16_to_cpu(cd->cd_SlotAddr); + dev->slotsize = be16_to_cpu(cd->cd_SlotSize); + dev->boardaddr = be32_to_cpu(cd->cd_BoardAddr); + dev->boardsize = be32_to_cpu(cd->cd_BoardSize); } else printk("amiga_parse_bootinfo: too many AutoConfig devices\n"); #endif /* CONFIG_ZORRO */ diff --git a/arch/m68k/amiga/platform.c b/arch/m68k/amiga/platform.c index 7847b2b1b5b6..d34029d7b058 100644 --- a/arch/m68k/amiga/platform.c +++ b/arch/m68k/amiga/platform.c @@ -13,6 +13,7 @@ #include #include +#include #ifdef CONFIG_ZORRO @@ -66,10 +67,12 @@ static int __init z_dev_present(zorro_id id) { unsigned int i; - for (i = 0; i < zorro_num_autocon; i++) - if (zorro_autocon_init[i].rom.er_Manufacturer == ZORRO_MANUF(id) && - zorro_autocon_init[i].rom.er_Product == ZORRO_PROD(id)) + for (i = 0; i < zorro_num_autocon; i++) { + const struct ExpansionRom *rom = &zorro_autocon_init[i].rom; + if (be16_to_cpu(rom->er_Manufacturer) == ZORRO_MANUF(id) && + rom->er_Product == ZORRO_PROD(id)) return 1; + } return 0; } diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index f492a19328e5..56139184b801 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -57,6 +57,7 @@ #include #include +#include #include #include #include @@ -678,6 +679,7 @@ static int a2065_init_one(struct zorro_dev *z, unsigned long base_addr = board + A2065_LANCE; unsigned long mem_start = board + A2065_RAM; struct resource *r1, *r2; + u32 serial; int err; r1 = request_mem_region(base_addr, sizeof(struct lance_regs), @@ -702,6 +704,7 @@ static int a2065_init_one(struct zorro_dev *z, r1->name = dev->name; r2->name = dev->name; + serial = be32_to_cpu(z->rom.er_SerialNumber); dev->dev_addr[0] = 0x00; if (z->id != ZORRO_PROD_AMERISTAR_A2065) { /* Commodore */ dev->dev_addr[1] = 0x80; @@ -710,9 +713,9 @@ static int a2065_init_one(struct zorro_dev *z, dev->dev_addr[1] = 0x00; dev->dev_addr[2] = 0x9f; } - dev->dev_addr[3] = (z->rom.er_SerialNumber >> 16) & 0xff; - dev->dev_addr[4] = (z->rom.er_SerialNumber >> 8) & 0xff; - dev->dev_addr[5] = z->rom.er_SerialNumber & 0xff; + dev->dev_addr[3] = (serial >> 16) & 0xff; + dev->dev_addr[4] = (serial >> 8) & 0xff; + dev->dev_addr[5] = serial & 0xff; dev->base_addr = (unsigned long)ZTWO_VADDR(base_addr); dev->mem_start = (unsigned long)ZTWO_VADDR(mem_start); dev->mem_end = dev->mem_start + A2065_RAM_SIZE; diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c index 33822cb69366..b08101b31b8b 100644 --- a/drivers/net/ethernet/amd/ariadne.c +++ b/drivers/net/ethernet/amd/ariadne.c @@ -51,6 +51,7 @@ #include #include +#include #include #include #include @@ -718,6 +719,7 @@ static int ariadne_init_one(struct zorro_dev *z, struct resource *r1, *r2; struct net_device *dev; struct ariadne_private *priv; + u32 serial; int err; r1 = request_mem_region(base_addr, sizeof(struct Am79C960), "Am79C960"); @@ -741,12 +743,13 @@ static int ariadne_init_one(struct zorro_dev *z, r1->name = dev->name; r2->name = dev->name; + serial = be32_to_cpu(z->rom.er_SerialNumber); dev->dev_addr[0] = 0x00; dev->dev_addr[1] = 0x60; dev->dev_addr[2] = 0x30; - dev->dev_addr[3] = (z->rom.er_SerialNumber >> 16) & 0xff; - dev->dev_addr[4] = (z->rom.er_SerialNumber >> 8) & 0xff; - dev->dev_addr[5] = z->rom.er_SerialNumber & 0xff; + dev->dev_addr[3] = (serial >> 16) & 0xff; + dev->dev_addr[4] = (serial >> 8) & 0xff; + dev->dev_addr[5] = serial & 0xff; dev->base_addr = (unsigned long)ZTWO_VADDR(base_addr); dev->mem_start = (unsigned long)ZTWO_VADDR(mem_start); dev->mem_end = dev->mem_start + ARIADNE_RAM_SIZE; diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c index ea1ce822a8e0..6ac2579da0eb 100644 --- a/drivers/zorro/proc.c +++ b/drivers/zorro/proc.c @@ -14,6 +14,8 @@ #include #include #include + +#include #include #include #include @@ -41,10 +43,10 @@ proc_bus_zorro_read(struct file *file, char __user *buf, size_t nbytes, loff_t * /* Construct a ConfigDev */ memset(&cd, 0, sizeof(cd)); cd.cd_Rom = z->rom; - cd.cd_SlotAddr = z->slotaddr; - cd.cd_SlotSize = z->slotsize; - cd.cd_BoardAddr = (void *)zorro_resource_start(z); - cd.cd_BoardSize = zorro_resource_len(z); + cd.cd_SlotAddr = cpu_to_be16(z->slotaddr); + cd.cd_SlotSize = cpu_to_be16(z->slotsize); + cd.cd_BoardAddr = cpu_to_be32(zorro_resource_start(z)); + cd.cd_BoardSize = cpu_to_be32(zorro_resource_len(z)); if (copy_to_user(buf, (void *)&cd + pos, nbytes)) return -EFAULT; diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c index 26f7184ef9e1..36b210f9b6b2 100644 --- a/drivers/zorro/zorro-sysfs.c +++ b/drivers/zorro/zorro-sysfs.c @@ -16,6 +16,8 @@ #include #include +#include + #include "zorro.h" @@ -33,10 +35,20 @@ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL); zorro_config_attr(id, id, "0x%08x\n"); zorro_config_attr(type, rom.er_Type, "0x%02x\n"); -zorro_config_attr(serial, rom.er_SerialNumber, "0x%08x\n"); zorro_config_attr(slotaddr, slotaddr, "0x%04x\n"); zorro_config_attr(slotsize, slotsize, "0x%04x\n"); +static ssize_t +show_serial(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct zorro_dev *z; + + z = to_zorro_dev(dev); + return sprintf(buf, "0x%08x\n", be32_to_cpu(z->rom.er_SerialNumber)); +} + +static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); + static ssize_t zorro_show_resource(struct device *dev, struct device_attribute *attr, char *buf) { struct zorro_dev *z = to_zorro_dev(dev); @@ -60,10 +72,10 @@ static ssize_t zorro_read_config(struct file *filp, struct kobject *kobj, /* Construct a ConfigDev */ memset(&cd, 0, sizeof(cd)); cd.cd_Rom = z->rom; - cd.cd_SlotAddr = z->slotaddr; - cd.cd_SlotSize = z->slotsize; - cd.cd_BoardAddr = (void *)zorro_resource_start(z); - cd.cd_BoardSize = zorro_resource_len(z); + cd.cd_SlotAddr = cpu_to_be16(z->slotaddr); + cd.cd_SlotSize = cpu_to_be16(z->slotsize); + cd.cd_BoardAddr = cpu_to_be32(zorro_resource_start(z)); + cd.cd_BoardSize = cpu_to_be32(zorro_resource_len(z)); return memory_read_from_buffer(buf, count, &off, &cd, sizeof(cd)); } diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c index 450abf100f06..707c1a5a0317 100644 --- a/drivers/zorro/zorro.c +++ b/drivers/zorro/zorro.c @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -161,7 +162,8 @@ static int __init amiga_zorro_probe(struct platform_device *pdev) z = &zorro_autocon[i]; z->rom = zi->rom; - z->id = (z->rom.er_Manufacturer<<16) | (z->rom.er_Product<<8); + z->id = (be16_to_cpu(z->rom.er_Manufacturer) << 16) | + (z->rom.er_Product << 8); if (z->id == ZORRO_PROD_GVP_EPC_BASE) { /* GVP quirk */ unsigned long magic = zi->boardaddr + 0x8000; diff --git a/include/uapi/linux/zorro.h b/include/uapi/linux/zorro.h index 36a033e23d31..59d021b242ed 100644 --- a/include/uapi/linux/zorro.h +++ b/include/uapi/linux/zorro.h @@ -63,26 +63,26 @@ enum GVP_flags { struct Node { - struct Node *ln_Succ; /* Pointer to next (successor) */ - struct Node *ln_Pred; /* Pointer to previous (predecessor) */ - __u8 ln_Type; - __s8 ln_Pri; /* Priority, for sorting */ - __s8 *ln_Name; /* ID string, null terminated */ + __be32 ln_Succ; /* Pointer to next (successor) */ + __be32 ln_Pred; /* Pointer to previous (predecessor) */ + __u8 ln_Type; + __s8 ln_Pri; /* Priority, for sorting */ + __be32 ln_Name; /* ID string, null terminated */ } __packed; struct ExpansionRom { /* -First 16 bytes of the expansion ROM */ - __u8 er_Type; /* Board type, size and flags */ - __u8 er_Product; /* Product number, assigned by manufacturer */ - __u8 er_Flags; /* Flags */ - __u8 er_Reserved03; /* Must be zero ($ff inverted) */ - __u16 er_Manufacturer; /* Unique ID, ASSIGNED BY COMMODORE-AMIGA! */ - __u32 er_SerialNumber; /* Available for use by manufacturer */ - __u16 er_InitDiagVec; /* Offset to optional "DiagArea" structure */ - __u8 er_Reserved0c; - __u8 er_Reserved0d; - __u8 er_Reserved0e; - __u8 er_Reserved0f; + __u8 er_Type; /* Board type, size and flags */ + __u8 er_Product; /* Product number, assigned by manufacturer */ + __u8 er_Flags; /* Flags */ + __u8 er_Reserved03; /* Must be zero ($ff inverted) */ + __be16 er_Manufacturer; /* Unique ID, ASSIGNED BY COMMODORE-AMIGA! */ + __be32 er_SerialNumber; /* Available for use by manufacturer */ + __be16 er_InitDiagVec; /* Offset to optional "DiagArea" structure */ + __u8 er_Reserved0c; + __u8 er_Reserved0d; + __u8 er_Reserved0e; + __u8 er_Reserved0f; } __packed; /* er_Type board type bits */ @@ -99,13 +99,13 @@ struct ConfigDev { __u8 cd_Flags; /* (read/write) */ __u8 cd_Pad; /* reserved */ struct ExpansionRom cd_Rom; /* copy of board's expansion ROM */ - void *cd_BoardAddr; /* where in memory the board was placed */ - __u32 cd_BoardSize; /* size of board in bytes */ - __u16 cd_SlotAddr; /* which slot number (PRIVATE) */ - __u16 cd_SlotSize; /* number of slots (PRIVATE) */ - void *cd_Driver; /* pointer to node of driver */ - struct ConfigDev *cd_NextCD; /* linked list of drivers to config */ - __u32 cd_Unused[4]; /* for whatever the driver wants */ + __be32 cd_BoardAddr; /* where in memory the board was placed */ + __be32 cd_BoardSize; /* size of board in bytes */ + __be16 cd_SlotAddr; /* which slot number (PRIVATE) */ + __be16 cd_SlotSize; /* number of slots (PRIVATE) */ + __be32 cd_Driver; /* pointer to node of driver */ + __be32 cd_NextCD; /* linked list of drivers to config */ + __be32 cd_Unused[4]; /* for whatever the driver wants */ } __packed; #define ZORRO_NUM_AUTO 16 From b3ce71720560a9125aa0343bdbf501d887dbfb74 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Oct 2013 12:38:23 +0200 Subject: [PATCH 051/981] block/z2ram: Remove duplicate external declarations Remove the external declarations for m68k_realnum_memory and m68k_memory, which are already provided by , to avoid conflicts later. Signed-off-by: Geert Uytterhoeven --- drivers/block/z2ram.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 8b2a60cee3a0..27de5046708a 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -43,9 +43,6 @@ #include -extern int m68k_realnum_memory; -extern struct mem_info m68k_memory[NUM_MEMINFO]; - #define Z2MINOR_COMBINED (0) #define Z2MINOR_Z2ONLY (1) #define Z2MINOR_CHIPONLY (2) From 29a202035753dd0e5810caaefe885ed8934bfd46 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 9 Sep 2013 09:00:50 +0200 Subject: [PATCH 052/981] m68k: The bootinfo is located right after the kernel Since the introduction of init sections (which are located after BSS), the bootinfo is no longer located right after the BSS, but after all kernel sections. Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/bootinfo.h | 2 +- arch/m68k/kernel/head.S | 2 +- arch/m68k/kernel/setup_mm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h index 67e7a78ad96b..fbb6150f00f3 100644 --- a/arch/m68k/include/asm/bootinfo.h +++ b/arch/m68k/include/asm/bootinfo.h @@ -34,7 +34,7 @@ * This way I hope to keep all future changes back/forewards compatible. * Thus, keep your fingers crossed... * - * This structure is copied right after the kernel bss by the bootstrap + * This structure is copied right after the kernel by the bootstrap * routine. */ diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S index ac85f16534af..fb348caa734e 100644 --- a/arch/m68k/kernel/head.S +++ b/arch/m68k/kernel/head.S @@ -1532,7 +1532,7 @@ L(cache_done): /* * Find a tag record in the bootinfo structure - * The bootinfo structure is located right after the kernel bss + * The bootinfo structure is located right after the kernel * Returns: d0: size (-1 if not found) * a0: data pointer (end-of-records if not found) */ diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 9f16a15fa287..18423a843379 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -220,7 +220,7 @@ void __init setup_arch(char **cmdline_p) int i; #endif - /* The bootinfo is located right after the kernel bss */ + /* The bootinfo is located right after the kernel */ if (!CPU_IS_COLDFIRE) m68k_parse_bootinfo((const struct bi_record *)_end); From 7ca1e52dc816e2c1f6663d9d114f25f0b22f98f1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 14 Oct 2013 11:45:07 +0200 Subject: [PATCH 053/981] m68k: head.S - Correct date and spelling Signed-off-by: Geert Uytterhoeven --- arch/m68k/kernel/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S index fb348caa734e..8d14b0f9a36f 100644 --- a/arch/m68k/kernel/head.S +++ b/arch/m68k/kernel/head.S @@ -23,7 +23,7 @@ ** 98/04/25 Phil Blundell: added HP300 support ** 1998/08/30 David Kilzer: Added support for font_desc structures ** for linux-2.1.115 -** 9/02/11 Richard Zidlicky: added Q40 support (initial vesion 99/01/01) +** 1999/02/11 Richard Zidlicky: added Q40 support (initial version 99/01/01) ** 2004/05/13 Kars de Jong: Finalised HP300 support ** ** This file is subject to the terms and conditions of the GNU General Public From 958903d6f08c977778bf5e1f18eb7fd27b30ca72 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 1 Oct 2013 20:42:32 +0200 Subject: [PATCH 054/981] m68k: Drop remainings and API of BOOTINFO_COMPAT_1_0 Drop remainings and API for backwards compatibility with bootinfo interface version 1.0. This was used when booting a 2.1.x or newer kernel on Amiga, Atari, or Mac using a bootstrap for kernel 2.0.x. Everybody upgraded his bootstrap a long time ago, so this can go. Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/bootinfo.h | 112 ------------------------------- arch/m68k/mac/config.c | 1 - arch/m68k/mac/misc.c | 1 - 3 files changed, 114 deletions(-) diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h index fbb6150f00f3..44f97beb5215 100644 --- a/arch/m68k/include/asm/bootinfo.h +++ b/arch/m68k/include/asm/bootinfo.h @@ -263,116 +263,4 @@ struct bootversion { #define Q40_BOOTI_VERSION MK_BI_VERSION( 2, 0 ) #define HP300_BOOTI_VERSION MK_BI_VERSION( 2, 0 ) -#ifdef BOOTINFO_COMPAT_1_0 - - /* - * Backwards compatibility with bootinfo interface version 1.0 - */ - -#define COMPAT_AMIGA_BOOTI_VERSION MK_BI_VERSION( 1, 0 ) -#define COMPAT_ATARI_BOOTI_VERSION MK_BI_VERSION( 1, 0 ) -#define COMPAT_MAC_BOOTI_VERSION MK_BI_VERSION( 1, 0 ) - -#include - -#define COMPAT_NUM_AUTO 16 - -struct compat_bi_Amiga { - int model; - int num_autocon; - struct ConfigDev autocon[COMPAT_NUM_AUTO]; - unsigned long chip_size; - unsigned char vblank; - unsigned char psfreq; - unsigned long eclock; - unsigned long chipset; - unsigned long hw_present; -}; - -struct compat_bi_Atari { - unsigned long hw_present; - unsigned long mch_cookie; -}; - -#ifndef __ASSEMBLY__ - -struct compat_bi_Macintosh -{ - unsigned long videoaddr; - unsigned long videorow; - unsigned long videodepth; - unsigned long dimensions; - unsigned long args; - unsigned long boottime; - unsigned long gmtbias; - unsigned long bootver; - unsigned long videological; - unsigned long sccbase; - unsigned long id; - unsigned long memsize; - unsigned long serialmf; - unsigned long serialhsk; - unsigned long serialgpi; - unsigned long printmf; - unsigned long printhsk; - unsigned long printgpi; - unsigned long cpuid; - unsigned long rombase; - unsigned long adbdelay; - unsigned long timedbra; -}; - -#endif - -struct compat_mem_info { - unsigned long addr; - unsigned long size; -}; - -#define COMPAT_NUM_MEMINFO 4 - -#define COMPAT_CPUB_68020 0 -#define COMPAT_CPUB_68030 1 -#define COMPAT_CPUB_68040 2 -#define COMPAT_CPUB_68060 3 -#define COMPAT_FPUB_68881 5 -#define COMPAT_FPUB_68882 6 -#define COMPAT_FPUB_68040 7 -#define COMPAT_FPUB_68060 8 - -#define COMPAT_CPU_68020 (1< #include -#define BOOTINFO_COMPAT_1_0 #include #include diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index 5e085554ac7f..3d02a3c3a93a 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -25,7 +25,6 @@ #include #include -#define BOOTINFO_COMPAT_1_0 #include #include From bdd47c9fc1d96d3319e938931c83b863b5331b57 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 2 Oct 2013 10:01:43 +0200 Subject: [PATCH 055/981] m68k/mac: Move struct mac_booter_data to struct mac_booter_data is no longer part of the bootinfo API, hence move it from to , dropping all unused fields in the process. Also remove the no longer used mac_booter_data pointer from head.S. Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/bootinfo.h | 39 ------------------------------- arch/m68k/include/asm/macintosh.h | 23 ++++++++++++++++++ arch/m68k/kernel/head.S | 2 -- 3 files changed, 23 insertions(+), 41 deletions(-) diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h index 44f97beb5215..908c42f91e6a 100644 --- a/arch/m68k/include/asm/bootinfo.h +++ b/arch/m68k/include/asm/bootinfo.h @@ -167,45 +167,6 @@ struct bi_record { #define BI_MAC_IOP_SWIM 0x8020 /* Mac SWIM floppy IOP */ #define BI_MAC_IOP_ADB 0x8021 /* Mac ADB IOP */ - /* - * Mac: compatibility with old booter data format (temporarily) - * Fields unused with the new bootinfo can be deleted now; instead of - * adding new fields the struct might be splitted into a hardware address - * part and a hardware type part - */ - -#ifndef __ASSEMBLY__ - -struct mac_booter_data -{ - unsigned long videoaddr; - unsigned long videorow; - unsigned long videodepth; - unsigned long dimensions; - unsigned long args; - unsigned long boottime; - unsigned long gmtbias; - unsigned long bootver; - unsigned long videological; - unsigned long sccbase; - unsigned long id; - unsigned long memsize; - unsigned long serialmf; - unsigned long serialhsk; - unsigned long serialgpi; - unsigned long printmf; - unsigned long printhsk; - unsigned long printgpi; - unsigned long cpuid; - unsigned long rombase; - unsigned long adbdelay; - unsigned long timedbra; -}; - -extern struct mac_booter_data - mac_bi_data; - -#endif /* * Apollo-specific tags diff --git a/arch/m68k/include/asm/macintosh.h b/arch/m68k/include/asm/macintosh.h index 682a1a2ff55f..6a633eb64331 100644 --- a/arch/m68k/include/asm/macintosh.h +++ b/arch/m68k/include/asm/macintosh.h @@ -135,4 +135,27 @@ struct mac_model extern struct mac_model *macintosh_config; + + /* + * Internal representation of the Mac hardware, filled in from bootinfo + */ + +struct mac_booter_data +{ + unsigned long videoaddr; + unsigned long videorow; + unsigned long videodepth; + unsigned long dimensions; + unsigned long boottime; + unsigned long gmtbias; + unsigned long videological; + unsigned long sccbase; + unsigned long id; + unsigned long memsize; + unsigned long cpuid; + unsigned long rombase; +}; + +extern struct mac_booter_data mac_bi_data; + #endif diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S index 8d14b0f9a36f..28f817481f3b 100644 --- a/arch/m68k/kernel/head.S +++ b/arch/m68k/kernel/head.S @@ -3896,8 +3896,6 @@ BVME_SCC_DATA_A = 0xffb0000f #endif #if defined(CONFIG_MAC) -L(mac_booter_data): - .long 0 L(mac_videobase): .long 0 L(mac_videodepth): From 4edf07fd8f6520bea292384a3638088ccbfdcaa1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 2 Oct 2013 22:28:08 +0200 Subject: [PATCH 056/981] m68k/vme: Remove unused mvme_bdid_ptr Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/mvme16xhw.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/m68k/include/asm/mvme16xhw.h b/arch/m68k/include/asm/mvme16xhw.h index 6117f56653d2..87fa84ad252e 100644 --- a/arch/m68k/include/asm/mvme16xhw.h +++ b/arch/m68k/include/asm/mvme16xhw.h @@ -7,8 +7,6 @@ /* Note, bytes 12 and 13 are board no in BCD (0162,0166,0167,0177,etc) */ -extern long mvme_bdid_ptr; - typedef struct { char bdid[4]; u_char rev, mth, day, yr; From 4c3c522bcebe16a717d7a809fd14b11823794027 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 2 Oct 2013 11:37:33 +0200 Subject: [PATCH 057/981] m68k/UAPI: Disintegrate arch/m68k/include/asm/bootinfo.h Export the bootinfo definitions that are used by bootstrap loaders, and split them up in generic and platform-specific parts. Signed-off-by: Geert Uytterhoeven --- arch/m68k/amiga/config.c | 1 + arch/m68k/apollo/config.c | 1 + arch/m68k/atari/config.c | 1 + arch/m68k/bvme6000/config.c | 1 + arch/m68k/hp300/config.c | 1 + arch/m68k/include/asm/atarihw.h | 2 +- arch/m68k/include/asm/bootinfo.h | 213 +------------------ arch/m68k/include/uapi/asm/Kbuild | 8 + arch/m68k/include/uapi/asm/bootinfo-amiga.h | 31 +++ arch/m68k/include/uapi/asm/bootinfo-apollo.h | 16 ++ arch/m68k/include/uapi/asm/bootinfo-atari.h | 44 ++++ arch/m68k/include/uapi/asm/bootinfo-hp300.h | 25 +++ arch/m68k/include/uapi/asm/bootinfo-mac.h | 59 +++++ arch/m68k/include/uapi/asm/bootinfo-q40.h | 16 ++ arch/m68k/include/uapi/asm/bootinfo-vme.h | 46 ++++ arch/m68k/include/uapi/asm/bootinfo.h | 95 +++++++++ arch/m68k/kernel/head.S | 6 + arch/m68k/mac/config.c | 1 + arch/m68k/mvme147/config.c | 1 + arch/m68k/mvme16x/config.c | 1 + 20 files changed, 356 insertions(+), 213 deletions(-) create mode 100644 arch/m68k/include/uapi/asm/bootinfo-amiga.h create mode 100644 arch/m68k/include/uapi/asm/bootinfo-apollo.h create mode 100644 arch/m68k/include/uapi/asm/bootinfo-atari.h create mode 100644 arch/m68k/include/uapi/asm/bootinfo-hp300.h create mode 100644 arch/m68k/include/uapi/asm/bootinfo-mac.h create mode 100644 arch/m68k/include/uapi/asm/bootinfo-q40.h create mode 100644 arch/m68k/include/uapi/asm/bootinfo-vme.h create mode 100644 arch/m68k/include/uapi/asm/bootinfo.h diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 65b5e937ebba..0c92c1baf9bf 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c index 2bdde9685a29..c90c19e904bc 100644 --- a/arch/m68k/apollo/config.c +++ b/arch/m68k/apollo/config.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index fb2d0bd9b3ad..9159195505d5 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 1b9e31e0130f..4d1b403822fa 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c index b7609f791522..892d0794fc0b 100644 --- a/arch/m68k/hp300/config.c +++ b/arch/m68k/hp300/config.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include /* readb() and writeb() */ diff --git a/arch/m68k/include/asm/atarihw.h b/arch/m68k/include/asm/atarihw.h index d887050e6da6..972c8f33f055 100644 --- a/arch/m68k/include/asm/atarihw.h +++ b/arch/m68k/include/asm/atarihw.h @@ -21,7 +21,7 @@ #define _LINUX_ATARIHW_H_ #include -#include +#include #include extern u_long atari_mch_cookie; diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h index 908c42f91e6a..9edc31893fb8 100644 --- a/arch/m68k/include/asm/bootinfo.h +++ b/arch/m68k/include/asm/bootinfo.h @@ -6,222 +6,11 @@ ** This file is subject to the terms and conditions of the GNU General Public ** License. See the file COPYING in the main directory of this archive ** for more details. -** -** Created 09/29/92 by Greg Harp -** -** 5/2/94 Roman Hodek: -** Added bi_atari part of the machine dependent union bi_un; for now it -** contains just a model field to distinguish between TT and Falcon. -** 26/7/96 Roman Zippel: -** Renamed to setup.h; added some useful macros to allow gcc some -** optimizations if possible. -** 5/10/96 Geert Uytterhoeven: -** Redesign of the boot information structure; renamed to bootinfo.h again -** 27/11/96 Geert Uytterhoeven: -** Backwards compatibility with bootinfo interface version 1.0 */ #ifndef _M68K_BOOTINFO_H #define _M68K_BOOTINFO_H - - /* - * Bootinfo definitions - * - * This is an easily parsable and extendable structure containing all - * information to be passed from the bootstrap to the kernel. - * - * This way I hope to keep all future changes back/forewards compatible. - * Thus, keep your fingers crossed... - * - * This structure is copied right after the kernel by the bootstrap - * routine. - */ - -#ifndef __ASSEMBLY__ - -struct bi_record { - unsigned short tag; /* tag ID */ - unsigned short size; /* size of record (in bytes) */ - unsigned long data[0]; /* data */ -}; - -#endif /* __ASSEMBLY__ */ - - - /* - * Tag Definitions - * - * Machine independent tags start counting from 0x0000 - * Machine dependent tags start counting from 0x8000 - */ - -#define BI_LAST 0x0000 /* last record (sentinel) */ -#define BI_MACHTYPE 0x0001 /* machine type (u_long) */ -#define BI_CPUTYPE 0x0002 /* cpu type (u_long) */ -#define BI_FPUTYPE 0x0003 /* fpu type (u_long) */ -#define BI_MMUTYPE 0x0004 /* mmu type (u_long) */ -#define BI_MEMCHUNK 0x0005 /* memory chunk address and size */ - /* (struct mem_info) */ -#define BI_RAMDISK 0x0006 /* ramdisk address and size */ - /* (struct mem_info) */ -#define BI_COMMAND_LINE 0x0007 /* kernel command line parameters */ - /* (string) */ - - /* - * Amiga-specific tags - */ - -#define BI_AMIGA_MODEL 0x8000 /* model (u_long) */ -#define BI_AMIGA_AUTOCON 0x8001 /* AutoConfig device */ - /* (struct ConfigDev) */ -#define BI_AMIGA_CHIP_SIZE 0x8002 /* size of Chip RAM (u_long) */ -#define BI_AMIGA_VBLANK 0x8003 /* VBLANK frequency (u_char) */ -#define BI_AMIGA_PSFREQ 0x8004 /* power supply frequency (u_char) */ -#define BI_AMIGA_ECLOCK 0x8005 /* EClock frequency (u_long) */ -#define BI_AMIGA_CHIPSET 0x8006 /* native chipset present (u_long) */ -#define BI_AMIGA_SERPER 0x8007 /* serial port period (u_short) */ - - /* - * Atari-specific tags - */ - -#define BI_ATARI_MCH_COOKIE 0x8000 /* _MCH cookie from TOS (u_long) */ -#define BI_ATARI_MCH_TYPE 0x8001 /* special machine type (u_long) */ - /* (values are ATARI_MACH_* defines */ - -/* mch_cookie values (upper word) */ -#define ATARI_MCH_ST 0 -#define ATARI_MCH_STE 1 -#define ATARI_MCH_TT 2 -#define ATARI_MCH_FALCON 3 - -/* mch_type values */ -#define ATARI_MACH_NORMAL 0 /* no special machine type */ -#define ATARI_MACH_MEDUSA 1 /* Medusa 040 */ -#define ATARI_MACH_HADES 2 /* Hades 040 or 060 */ -#define ATARI_MACH_AB40 3 /* Afterburner040 on Falcon */ - - /* - * VME-specific tags - */ - -#define BI_VME_TYPE 0x8000 /* VME sub-architecture (u_long) */ -#define BI_VME_BRDINFO 0x8001 /* VME board information (struct) */ - -/* BI_VME_TYPE codes */ -#define VME_TYPE_TP34V 0x0034 /* Tadpole TP34V */ -#define VME_TYPE_MVME147 0x0147 /* Motorola MVME147 */ -#define VME_TYPE_MVME162 0x0162 /* Motorola MVME162 */ -#define VME_TYPE_MVME166 0x0166 /* Motorola MVME166 */ -#define VME_TYPE_MVME167 0x0167 /* Motorola MVME167 */ -#define VME_TYPE_MVME172 0x0172 /* Motorola MVME172 */ -#define VME_TYPE_MVME177 0x0177 /* Motorola MVME177 */ -#define VME_TYPE_BVME4000 0x4000 /* BVM Ltd. BVME4000 */ -#define VME_TYPE_BVME6000 0x6000 /* BVM Ltd. BVME6000 */ - -/* BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on - * Motorola VME boards. Contains board number, Bug version, board - * configuration options, etc. See include/asm/mvme16xhw.h for details. - */ - - - /* - * Macintosh-specific tags (all u_long) - */ - -#define BI_MAC_MODEL 0x8000 /* Mac Gestalt ID (model type) */ -#define BI_MAC_VADDR 0x8001 /* Mac video base address */ -#define BI_MAC_VDEPTH 0x8002 /* Mac video depth */ -#define BI_MAC_VROW 0x8003 /* Mac video rowbytes */ -#define BI_MAC_VDIM 0x8004 /* Mac video dimensions */ -#define BI_MAC_VLOGICAL 0x8005 /* Mac video logical base */ -#define BI_MAC_SCCBASE 0x8006 /* Mac SCC base address */ -#define BI_MAC_BTIME 0x8007 /* Mac boot time */ -#define BI_MAC_GMTBIAS 0x8008 /* Mac GMT timezone offset */ -#define BI_MAC_MEMSIZE 0x8009 /* Mac RAM size (sanity check) */ -#define BI_MAC_CPUID 0x800a /* Mac CPU type (sanity check) */ -#define BI_MAC_ROMBASE 0x800b /* Mac system ROM base address */ - - /* - * Macintosh hardware profile data - unused, see macintosh.h for - * reasonable type values - */ - -#define BI_MAC_VIA1BASE 0x8010 /* Mac VIA1 base address (always present) */ -#define BI_MAC_VIA2BASE 0x8011 /* Mac VIA2 base address (type varies) */ -#define BI_MAC_VIA2TYPE 0x8012 /* Mac VIA2 type (VIA, RBV, OSS) */ -#define BI_MAC_ADBTYPE 0x8013 /* Mac ADB interface type */ -#define BI_MAC_ASCBASE 0x8014 /* Mac Apple Sound Chip base address */ -#define BI_MAC_SCSI5380 0x8015 /* Mac NCR 5380 SCSI (base address, multi) */ -#define BI_MAC_SCSIDMA 0x8016 /* Mac SCSI DMA (base address) */ -#define BI_MAC_SCSI5396 0x8017 /* Mac NCR 53C96 SCSI (base address, multi) */ -#define BI_MAC_IDETYPE 0x8018 /* Mac IDE interface type */ -#define BI_MAC_IDEBASE 0x8019 /* Mac IDE interface base address */ -#define BI_MAC_NUBUS 0x801a /* Mac Nubus type (none, regular, pseudo) */ -#define BI_MAC_SLOTMASK 0x801b /* Mac Nubus slots present */ -#define BI_MAC_SCCTYPE 0x801c /* Mac SCC serial type (normal, IOP) */ -#define BI_MAC_ETHTYPE 0x801d /* Mac builtin ethernet type (Sonic, MACE */ -#define BI_MAC_ETHBASE 0x801e /* Mac builtin ethernet base address */ -#define BI_MAC_PMU 0x801f /* Mac power management / poweroff hardware */ -#define BI_MAC_IOP_SWIM 0x8020 /* Mac SWIM floppy IOP */ -#define BI_MAC_IOP_ADB 0x8021 /* Mac ADB IOP */ - - - /* - * Apollo-specific tags - */ - -#define BI_APOLLO_MODEL 0x8000 /* model (u_long) */ - - /* - * HP300-specific tags - */ - -#define BI_HP300_MODEL 0x8000 /* model (u_long) */ -#define BI_HP300_UART_SCODE 0x8001 /* UART select code (u_long) */ -#define BI_HP300_UART_ADDR 0x8002 /* phys. addr of UART (u_long) */ - - /* - * Stuff for bootinfo interface versioning - * - * At the start of kernel code, a 'struct bootversion' is located. - * bootstrap checks for a matching version of the interface before booting - * a kernel, to avoid user confusion if kernel and bootstrap don't work - * together :-) - * - * If incompatible changes are made to the bootinfo interface, the major - * number below should be stepped (and the minor reset to 0) for the - * appropriate machine. If a change is backward-compatible, the minor - * should be stepped. "Backwards-compatible" means that booting will work, - * but certain features may not. - */ - -#define BOOTINFOV_MAGIC 0x4249561A /* 'BIV^Z' */ -#define MK_BI_VERSION(major,minor) (((major)<<16)+(minor)) -#define BI_VERSION_MAJOR(v) (((v) >> 16) & 0xffff) -#define BI_VERSION_MINOR(v) ((v) & 0xffff) - -#ifndef __ASSEMBLY__ - -struct bootversion { - unsigned short branch; - unsigned long magic; - struct { - unsigned long machtype; - unsigned long version; - } machversions[0]; -}; - -#endif /* __ASSEMBLY__ */ - -#define AMIGA_BOOTI_VERSION MK_BI_VERSION( 2, 0 ) -#define ATARI_BOOTI_VERSION MK_BI_VERSION( 2, 1 ) -#define MAC_BOOTI_VERSION MK_BI_VERSION( 2, 0 ) -#define MVME147_BOOTI_VERSION MK_BI_VERSION( 2, 0 ) -#define MVME16x_BOOTI_VERSION MK_BI_VERSION( 2, 0 ) -#define BVME6000_BOOTI_VERSION MK_BI_VERSION( 2, 0 ) -#define Q40_BOOTI_VERSION MK_BI_VERSION( 2, 0 ) -#define HP300_BOOTI_VERSION MK_BI_VERSION( 2, 0 ) +#include #endif /* _M68K_BOOTINFO_H */ diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild index 1fef45ada097..6a2d257bdfb2 100644 --- a/arch/m68k/include/uapi/asm/Kbuild +++ b/arch/m68k/include/uapi/asm/Kbuild @@ -11,6 +11,14 @@ generic-y += termbits.h generic-y += termios.h header-y += a.out.h +header-y += bootinfo.h +header-y += bootinfo-amiga.h +header-y += bootinfo-apollo.h +header-y += bootinfo-atari.h +header-y += bootinfo-hp300.h +header-y += bootinfo-mac.h +header-y += bootinfo-q40.h +header-y += bootinfo-vme.h header-y += byteorder.h header-y += cachectl.h header-y += fcntl.h diff --git a/arch/m68k/include/uapi/asm/bootinfo-amiga.h b/arch/m68k/include/uapi/asm/bootinfo-amiga.h new file mode 100644 index 000000000000..2c32d671f232 --- /dev/null +++ b/arch/m68k/include/uapi/asm/bootinfo-amiga.h @@ -0,0 +1,31 @@ +/* +** asm/bootinfo-amiga.h -- Amiga-specific boot information definitions +*/ + +#ifndef _UAPI_ASM_M68K_BOOTINFO_AMIGA_H +#define _UAPI_ASM_M68K_BOOTINFO_AMIGA_H + + + /* + * Amiga-specific tags + */ + +#define BI_AMIGA_MODEL 0x8000 /* model (u_long) */ +#define BI_AMIGA_AUTOCON 0x8001 /* AutoConfig device */ + /* (AmigaOS struct ConfigDev) */ +#define BI_AMIGA_CHIP_SIZE 0x8002 /* size of Chip RAM (u_long) */ +#define BI_AMIGA_VBLANK 0x8003 /* VBLANK frequency (u_char) */ +#define BI_AMIGA_PSFREQ 0x8004 /* power supply frequency (u_char) */ +#define BI_AMIGA_ECLOCK 0x8005 /* EClock frequency (u_long) */ +#define BI_AMIGA_CHIPSET 0x8006 /* native chipset present (u_long) */ +#define BI_AMIGA_SERPER 0x8007 /* serial port period (u_short) */ + + + /* + * Latest Amiga bootinfo version + */ + +#define AMIGA_BOOTI_VERSION MK_BI_VERSION(2, 0) + + +#endif /* _UAPI_ASM_M68K_BOOTINFO_AMIGA_H */ diff --git a/arch/m68k/include/uapi/asm/bootinfo-apollo.h b/arch/m68k/include/uapi/asm/bootinfo-apollo.h new file mode 100644 index 000000000000..3a2051e822b0 --- /dev/null +++ b/arch/m68k/include/uapi/asm/bootinfo-apollo.h @@ -0,0 +1,16 @@ +/* +** asm/bootinfo-apollo.h -- Apollo-specific boot information definitions +*/ + +#ifndef _UAPI_ASM_M68K_BOOTINFO_APOLLO_H +#define _UAPI_ASM_M68K_BOOTINFO_APOLLO_H + + + /* + * Apollo-specific tags + */ + +#define BI_APOLLO_MODEL 0x8000 /* model (u_long) */ + + +#endif /* _UAPI_ASM_M68K_BOOTINFO_APOLLO_H */ diff --git a/arch/m68k/include/uapi/asm/bootinfo-atari.h b/arch/m68k/include/uapi/asm/bootinfo-atari.h new file mode 100644 index 000000000000..cca0a83fc0e5 --- /dev/null +++ b/arch/m68k/include/uapi/asm/bootinfo-atari.h @@ -0,0 +1,44 @@ +/* +** asm/bootinfo-atari.h -- Atari-specific boot information definitions +*/ + +#ifndef _UAPI_ASM_M68K_BOOTINFO_ATARI_H +#define _UAPI_ASM_M68K_BOOTINFO_ATARI_H + + + /* + * Atari-specific tags + */ + +#define BI_ATARI_MCH_COOKIE 0x8000 /* _MCH cookie from TOS (u_long) */ +#define BI_ATARI_MCH_TYPE 0x8001 /* special machine type (u_long) */ + + + /* + * mch_cookie values (upper word of BI_ATARI_MCH_COOKIE) + */ + +#define ATARI_MCH_ST 0 +#define ATARI_MCH_STE 1 +#define ATARI_MCH_TT 2 +#define ATARI_MCH_FALCON 3 + + + /* + * Atari machine types (BI_ATARI_MCH_TYPE) + */ + +#define ATARI_MACH_NORMAL 0 /* no special machine type */ +#define ATARI_MACH_MEDUSA 1 /* Medusa 040 */ +#define ATARI_MACH_HADES 2 /* Hades 040 or 060 */ +#define ATARI_MACH_AB40 3 /* Afterburner040 on Falcon */ + + + /* + * Latest Atari bootinfo version + */ + +#define ATARI_BOOTI_VERSION MK_BI_VERSION(2, 1) + + +#endif /* _UAPI_ASM_M68K_BOOTINFO_ATARI_H */ diff --git a/arch/m68k/include/uapi/asm/bootinfo-hp300.h b/arch/m68k/include/uapi/asm/bootinfo-hp300.h new file mode 100644 index 000000000000..e7ed19cdafe5 --- /dev/null +++ b/arch/m68k/include/uapi/asm/bootinfo-hp300.h @@ -0,0 +1,25 @@ +/* +** asm/bootinfo-hp300.h -- HP9000/300-specific boot information definitions +*/ + +#ifndef _UAPI_ASM_M68K_BOOTINFO_HP300_H +#define _UAPI_ASM_M68K_BOOTINFO_HP300_H + + + /* + * HP9000/300-specific tags + */ + +#define BI_HP300_MODEL 0x8000 /* model (u_long) */ +#define BI_HP300_UART_SCODE 0x8001 /* UART select code (u_long) */ +#define BI_HP300_UART_ADDR 0x8002 /* phys. addr of UART (u_long) */ + + + /* + * Latest HP9000/300 bootinfo version + */ + +#define HP300_BOOTI_VERSION MK_BI_VERSION(2, 0) + + +#endif /* _UAPI_ASM_M68K_BOOTINFO_HP300_H */ diff --git a/arch/m68k/include/uapi/asm/bootinfo-mac.h b/arch/m68k/include/uapi/asm/bootinfo-mac.h new file mode 100644 index 000000000000..d4ef2115c8f9 --- /dev/null +++ b/arch/m68k/include/uapi/asm/bootinfo-mac.h @@ -0,0 +1,59 @@ +/* +** asm/bootinfo-mac.h -- Macintosh-specific boot information definitions +*/ + +#ifndef _UAPI_ASM_M68K_BOOTINFO_MAC_H +#define _UAPI_ASM_M68K_BOOTINFO_MAC_H + + + /* + * Macintosh-specific tags (all u_long) + */ + +#define BI_MAC_MODEL 0x8000 /* Mac Gestalt ID (model type) */ +#define BI_MAC_VADDR 0x8001 /* Mac video base address */ +#define BI_MAC_VDEPTH 0x8002 /* Mac video depth */ +#define BI_MAC_VROW 0x8003 /* Mac video rowbytes */ +#define BI_MAC_VDIM 0x8004 /* Mac video dimensions */ +#define BI_MAC_VLOGICAL 0x8005 /* Mac video logical base */ +#define BI_MAC_SCCBASE 0x8006 /* Mac SCC base address */ +#define BI_MAC_BTIME 0x8007 /* Mac boot time */ +#define BI_MAC_GMTBIAS 0x8008 /* Mac GMT timezone offset */ +#define BI_MAC_MEMSIZE 0x8009 /* Mac RAM size (sanity check) */ +#define BI_MAC_CPUID 0x800a /* Mac CPU type (sanity check) */ +#define BI_MAC_ROMBASE 0x800b /* Mac system ROM base address */ + + + /* + * Macintosh hardware profile data - unused, see macintosh.h for + * reasonable type values + */ + +#define BI_MAC_VIA1BASE 0x8010 /* Mac VIA1 base address (always present) */ +#define BI_MAC_VIA2BASE 0x8011 /* Mac VIA2 base address (type varies) */ +#define BI_MAC_VIA2TYPE 0x8012 /* Mac VIA2 type (VIA, RBV, OSS) */ +#define BI_MAC_ADBTYPE 0x8013 /* Mac ADB interface type */ +#define BI_MAC_ASCBASE 0x8014 /* Mac Apple Sound Chip base address */ +#define BI_MAC_SCSI5380 0x8015 /* Mac NCR 5380 SCSI (base address, multi) */ +#define BI_MAC_SCSIDMA 0x8016 /* Mac SCSI DMA (base address) */ +#define BI_MAC_SCSI5396 0x8017 /* Mac NCR 53C96 SCSI (base address, multi) */ +#define BI_MAC_IDETYPE 0x8018 /* Mac IDE interface type */ +#define BI_MAC_IDEBASE 0x8019 /* Mac IDE interface base address */ +#define BI_MAC_NUBUS 0x801a /* Mac Nubus type (none, regular, pseudo) */ +#define BI_MAC_SLOTMASK 0x801b /* Mac Nubus slots present */ +#define BI_MAC_SCCTYPE 0x801c /* Mac SCC serial type (normal, IOP) */ +#define BI_MAC_ETHTYPE 0x801d /* Mac builtin ethernet type (Sonic, MACE */ +#define BI_MAC_ETHBASE 0x801e /* Mac builtin ethernet base address */ +#define BI_MAC_PMU 0x801f /* Mac power management / poweroff hardware */ +#define BI_MAC_IOP_SWIM 0x8020 /* Mac SWIM floppy IOP */ +#define BI_MAC_IOP_ADB 0x8021 /* Mac ADB IOP */ + + + /* + * Latest Macintosh bootinfo version + */ + +#define MAC_BOOTI_VERSION MK_BI_VERSION(2, 0) + + +#endif /* _UAPI_ASM_M68K_BOOTINFO_MAC_H */ diff --git a/arch/m68k/include/uapi/asm/bootinfo-q40.h b/arch/m68k/include/uapi/asm/bootinfo-q40.h new file mode 100644 index 000000000000..c79fea7e555b --- /dev/null +++ b/arch/m68k/include/uapi/asm/bootinfo-q40.h @@ -0,0 +1,16 @@ +/* +** asm/bootinfo-q40.h -- Q40-specific boot information definitions +*/ + +#ifndef _UAPI_ASM_M68K_BOOTINFO_Q40_H +#define _UAPI_ASM_M68K_BOOTINFO_Q40_H + + + /* + * Latest Q40 bootinfo version + */ + +#define Q40_BOOTI_VERSION MK_BI_VERSION(2, 0) + + +#endif /* _UAPI_ASM_M68K_BOOTINFO_Q40_H */ diff --git a/arch/m68k/include/uapi/asm/bootinfo-vme.h b/arch/m68k/include/uapi/asm/bootinfo-vme.h new file mode 100644 index 000000000000..4643a55ed768 --- /dev/null +++ b/arch/m68k/include/uapi/asm/bootinfo-vme.h @@ -0,0 +1,46 @@ +/* +** asm/bootinfo-vme.h -- VME-specific boot information definitions +*/ + +#ifndef _UAPI_ASM_M68K_BOOTINFO_VME_H +#define _UAPI_ASM_M68K_BOOTINFO_VME_H + + + /* + * VME-specific tags + */ + +#define BI_VME_TYPE 0x8000 /* VME sub-architecture (u_long) */ +#define BI_VME_BRDINFO 0x8001 /* VME board information (struct) */ + + + /* + * VME models (BI_VME_TYPE) + */ + +#define VME_TYPE_TP34V 0x0034 /* Tadpole TP34V */ +#define VME_TYPE_MVME147 0x0147 /* Motorola MVME147 */ +#define VME_TYPE_MVME162 0x0162 /* Motorola MVME162 */ +#define VME_TYPE_MVME166 0x0166 /* Motorola MVME166 */ +#define VME_TYPE_MVME167 0x0167 /* Motorola MVME167 */ +#define VME_TYPE_MVME172 0x0172 /* Motorola MVME172 */ +#define VME_TYPE_MVME177 0x0177 /* Motorola MVME177 */ +#define VME_TYPE_BVME4000 0x4000 /* BVM Ltd. BVME4000 */ +#define VME_TYPE_BVME6000 0x6000 /* BVM Ltd. BVME6000 */ + +/* BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on + * Motorola VME boards. Contains board number, Bug version, board + * configuration options, etc. See include/asm/mvme16xhw.h for details. + */ + + + /* + * Latest VME bootinfo versions + */ + +#define MVME147_BOOTI_VERSION MK_BI_VERSION(2, 0) +#define MVME16x_BOOTI_VERSION MK_BI_VERSION(2, 0) +#define BVME6000_BOOTI_VERSION MK_BI_VERSION(2, 0) + + +#endif /* _UAPI_ASM_M68K_BOOTINFO_VME_H */ diff --git a/arch/m68k/include/uapi/asm/bootinfo.h b/arch/m68k/include/uapi/asm/bootinfo.h new file mode 100644 index 000000000000..74a6fae2a5bf --- /dev/null +++ b/arch/m68k/include/uapi/asm/bootinfo.h @@ -0,0 +1,95 @@ +/* + * asm/bootinfo.h -- Definition of the Linux/m68k boot information structure + * + * Copyright 1992 by Greg Harp + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#ifndef _UAPI_ASM_M68K_BOOTINFO_H +#define _UAPI_ASM_M68K_BOOTINFO_H + +#include + + +#ifndef __ASSEMBLY__ + + /* + * Bootinfo definitions + * + * This is an easily parsable and extendable structure containing all + * information to be passed from the bootstrap to the kernel. + * + * This way I hope to keep all future changes back/forewards compatible. + * Thus, keep your fingers crossed... + * + * This structure is copied right after the kernel by the bootstrap + * routine. + */ + +struct bi_record { + unsigned short tag; /* tag ID */ + unsigned short size; /* size of record (in bytes) */ + unsigned long data[0]; /* data */ +}; + +#endif /* __ASSEMBLY__ */ + + + /* + * Tag Definitions + * + * Machine independent tags start counting from 0x0000 + * Machine dependent tags start counting from 0x8000 + */ + +#define BI_LAST 0x0000 /* last record (sentinel) */ +#define BI_MACHTYPE 0x0001 /* machine type (u_long) */ +#define BI_CPUTYPE 0x0002 /* cpu type (u_long) */ +#define BI_FPUTYPE 0x0003 /* fpu type (u_long) */ +#define BI_MMUTYPE 0x0004 /* mmu type (u_long) */ +#define BI_MEMCHUNK 0x0005 /* memory chunk address and size */ + /* (struct mem_info) */ +#define BI_RAMDISK 0x0006 /* ramdisk address and size */ + /* (struct mem_info) */ +#define BI_COMMAND_LINE 0x0007 /* kernel command line parameters */ + /* (string) */ + + + /* + * Stuff for bootinfo interface versioning + * + * At the start of kernel code, a 'struct bootversion' is located. + * bootstrap checks for a matching version of the interface before booting + * a kernel, to avoid user confusion if kernel and bootstrap don't work + * together :-) + * + * If incompatible changes are made to the bootinfo interface, the major + * number below should be stepped (and the minor reset to 0) for the + * appropriate machine. If a change is backward-compatible, the minor + * should be stepped. "Backwards-compatible" means that booting will work, + * but certain features may not. + */ + +#define BOOTINFOV_MAGIC 0x4249561A /* 'BIV^Z' */ +#define MK_BI_VERSION(major, minor) (((major) << 16) + (minor)) +#define BI_VERSION_MAJOR(v) (((v) >> 16) & 0xffff) +#define BI_VERSION_MINOR(v) ((v) & 0xffff) + +#ifndef __ASSEMBLY__ + +struct bootversion { + unsigned short branch; + unsigned long magic; + struct { + unsigned long machtype; + unsigned long version; + } machversions[0]; +} __packed; + +#endif /* __ASSEMBLY__ */ + + +#endif /* _UAPI_ASM_M68K_BOOTINFO_H */ diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S index 28f817481f3b..7d7913f5dce3 100644 --- a/arch/m68k/kernel/head.S +++ b/arch/m68k/kernel/head.S @@ -257,6 +257,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 677244e0371c..e48069da04ed 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -28,6 +28,7 @@ #include #include +#include #include #include diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 5c99154c67c8..3a1d47564e52 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index 60d8a1bc837d..e05994fd048c 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include From 799300840c18e7fdc0a3dace70d9b56a189fd1ab Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 2 Oct 2013 21:50:56 +0200 Subject: [PATCH 058/981] m68k/UAPI: Move generic definitions to Move generic definitions used by bootstraps to uapi/asm/bootinfo.h: - Machine types, - CPU, FPU, and MMU types, - struct mem_info. Keep a copy of struct mem_info for in-kernel use, and rename it to struct m68k_mem_info, as the exported one will be modified later. Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/setup.h | 5 +- arch/m68k/include/uapi/asm/bootinfo.h | 80 +++++++++++++++++++++++- arch/m68k/include/uapi/asm/setup.h | 87 --------------------------- arch/m68k/kernel/setup_mm.c | 4 +- arch/m68k/mm/init.c | 2 +- arch/m68k/mm/motorola.c | 2 +- 6 files changed, 85 insertions(+), 95 deletions(-) diff --git a/arch/m68k/include/asm/setup.h b/arch/m68k/include/asm/setup.h index 65e78a2dad64..8f2023f8c1c4 100644 --- a/arch/m68k/include/asm/setup.h +++ b/arch/m68k/include/asm/setup.h @@ -22,6 +22,7 @@ #ifndef _M68K_SETUP_H #define _M68K_SETUP_H +#include #include @@ -297,14 +298,14 @@ extern int m68k_is040or060; #define NUM_MEMINFO 4 #ifndef __ASSEMBLY__ -struct mem_info { +struct m68k_mem_info { unsigned long addr; /* physical address of memory chunk */ unsigned long size; /* length of memory chunk (in bytes) */ }; extern int m68k_num_memory; /* # of memory blocks found (and used) */ extern int m68k_realnum_memory; /* real # of memory blocks found */ -extern struct mem_info m68k_memory[NUM_MEMINFO];/* memory description */ +extern struct m68k_mem_info m68k_memory[NUM_MEMINFO];/* memory description */ #endif #endif /* _M68K_SETUP_H */ diff --git a/arch/m68k/include/uapi/asm/bootinfo.h b/arch/m68k/include/uapi/asm/bootinfo.h index 74a6fae2a5bf..9d64599d7bf5 100644 --- a/arch/m68k/include/uapi/asm/bootinfo.h +++ b/arch/m68k/include/uapi/asm/bootinfo.h @@ -11,8 +11,6 @@ #ifndef _UAPI_ASM_M68K_BOOTINFO_H #define _UAPI_ASM_M68K_BOOTINFO_H -#include - #ifndef __ASSEMBLY__ @@ -35,6 +33,12 @@ struct bi_record { unsigned long data[0]; /* data */ }; + +struct mem_info { + unsigned long addr; /* physical address of memory chunk */ + unsigned long size; /* length of memory chunk (in bytes) */ +}; + #endif /* __ASSEMBLY__ */ @@ -58,6 +62,78 @@ struct bi_record { /* (string) */ + /* + * Linux/m68k Architectures (BI_MACHTYPE) + */ + +#define MACH_AMIGA 1 +#define MACH_ATARI 2 +#define MACH_MAC 3 +#define MACH_APOLLO 4 +#define MACH_SUN3 5 +#define MACH_MVME147 6 +#define MACH_MVME16x 7 +#define MACH_BVME6000 8 +#define MACH_HP300 9 +#define MACH_Q40 10 +#define MACH_SUN3X 11 +#define MACH_M54XX 12 + + + /* + * CPU, FPU and MMU types (BI_CPUTYPE, BI_FPUTYPE, BI_MMUTYPE) + * + * Note: we may rely on the following equalities: + * + * CPU_68020 == MMU_68851 + * CPU_68030 == MMU_68030 + * CPU_68040 == FPU_68040 == MMU_68040 + * CPU_68060 == FPU_68060 == MMU_68060 + */ + +#define CPUB_68020 0 +#define CPUB_68030 1 +#define CPUB_68040 2 +#define CPUB_68060 3 +#define CPUB_COLDFIRE 4 + +#define CPU_68020 (1 << CPUB_68020) +#define CPU_68030 (1 << CPUB_68030) +#define CPU_68040 (1 << CPUB_68040) +#define CPU_68060 (1 << CPUB_68060) +#define CPU_COLDFIRE (1 << CPUB_COLDFIRE) + +#define FPUB_68881 0 +#define FPUB_68882 1 +#define FPUB_68040 2 /* Internal FPU */ +#define FPUB_68060 3 /* Internal FPU */ +#define FPUB_SUNFPA 4 /* Sun-3 FPA */ +#define FPUB_COLDFIRE 5 /* ColdFire FPU */ + +#define FPU_68881 (1 << FPUB_68881) +#define FPU_68882 (1 << FPUB_68882) +#define FPU_68040 (1 << FPUB_68040) +#define FPU_68060 (1 << FPUB_68060) +#define FPU_SUNFPA (1 << FPUB_SUNFPA) +#define FPU_COLDFIRE (1 << FPUB_COLDFIRE) + +#define MMUB_68851 0 +#define MMUB_68030 1 /* Internal MMU */ +#define MMUB_68040 2 /* Internal MMU */ +#define MMUB_68060 3 /* Internal MMU */ +#define MMUB_APOLLO 4 /* Custom Apollo */ +#define MMUB_SUN3 5 /* Custom Sun-3 */ +#define MMUB_COLDFIRE 6 /* Internal MMU */ + +#define MMU_68851 (1 << MMUB_68851) +#define MMU_68030 (1 << MMUB_68030) +#define MMU_68040 (1 << MMUB_68040) +#define MMU_68060 (1 << MMUB_68060) +#define MMU_SUN3 (1 << MMUB_SUN3) +#define MMU_APOLLO (1 << MMUB_APOLLO) +#define MMU_COLDFIRE (1 << MMUB_COLDFIRE) + + /* * Stuff for bootinfo interface versioning * diff --git a/arch/m68k/include/uapi/asm/setup.h b/arch/m68k/include/uapi/asm/setup.h index 85579bff455c..6a6dc636761e 100644 --- a/arch/m68k/include/uapi/asm/setup.h +++ b/arch/m68k/include/uapi/asm/setup.h @@ -6,98 +6,11 @@ ** This file is subject to the terms and conditions of the GNU General Public ** License. See the file COPYING in the main directory of this archive ** for more details. -** -** Created 09/29/92 by Greg Harp -** -** 5/2/94 Roman Hodek: -** Added bi_atari part of the machine dependent union bi_un; for now it -** contains just a model field to distinguish between TT and Falcon. -** 26/7/96 Roman Zippel: -** Renamed to setup.h; added some useful macros to allow gcc some -** optimizations if possible. -** 5/10/96 Geert Uytterhoeven: -** Redesign of the boot information structure; moved boot information -** structure to bootinfo.h */ #ifndef _UAPI_M68K_SETUP_H #define _UAPI_M68K_SETUP_H - - - /* - * Linux/m68k Architectures - */ - -#define MACH_AMIGA 1 -#define MACH_ATARI 2 -#define MACH_MAC 3 -#define MACH_APOLLO 4 -#define MACH_SUN3 5 -#define MACH_MVME147 6 -#define MACH_MVME16x 7 -#define MACH_BVME6000 8 -#define MACH_HP300 9 -#define MACH_Q40 10 -#define MACH_SUN3X 11 -#define MACH_M54XX 12 - #define COMMAND_LINE_SIZE 256 - - - /* - * CPU, FPU and MMU types - * - * Note: we may rely on the following equalities: - * - * CPU_68020 == MMU_68851 - * CPU_68030 == MMU_68030 - * CPU_68040 == FPU_68040 == MMU_68040 - * CPU_68060 == FPU_68060 == MMU_68060 - */ - -#define CPUB_68020 0 -#define CPUB_68030 1 -#define CPUB_68040 2 -#define CPUB_68060 3 -#define CPUB_COLDFIRE 4 - -#define CPU_68020 (1<addr) >> __virt_to_node_shift(); diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 251c5437787b..7d4024432163 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -233,7 +233,7 @@ void __init paging_init(void) printk("Fix your bootloader or use a memfile to make use of this area!\n"); m68k_num_memory--; memmove(m68k_memory + i, m68k_memory + i + 1, - (m68k_num_memory - i) * sizeof(struct mem_info)); + (m68k_num_memory - i) * sizeof(struct m68k_mem_info)); continue; } addr = m68k_memory[i].addr + m68k_memory[i].size; From 7bc449688bf73891625616f3f2e93e359603016b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 3 Oct 2013 11:26:22 +0200 Subject: [PATCH 059/981] m68k/UAPI: Move Amiga model/chipset definitions to Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/amigahw.h | 26 +---------------- arch/m68k/include/uapi/asm/bootinfo-amiga.h | 32 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/arch/m68k/include/asm/amigahw.h b/arch/m68k/include/asm/amigahw.h index 3bef0b2ad945..5ad568110f17 100644 --- a/arch/m68k/include/asm/amigahw.h +++ b/arch/m68k/include/asm/amigahw.h @@ -18,26 +18,7 @@ #include - /* - * Different Amiga models - */ - -#define AMI_UNKNOWN (0) -#define AMI_500 (1) -#define AMI_500PLUS (2) -#define AMI_600 (3) -#define AMI_1000 (4) -#define AMI_1200 (5) -#define AMI_2000 (6) -#define AMI_2500 (7) -#define AMI_3000 (8) -#define AMI_3000T (9) -#define AMI_3000PLUS (10) -#define AMI_4000 (11) -#define AMI_4000T (12) -#define AMI_CDTV (13) -#define AMI_CD32 (14) -#define AMI_DRACO (15) +#include /* @@ -46,11 +27,6 @@ extern unsigned long amiga_chipset; -#define CS_STONEAGE (0) -#define CS_OCS (1) -#define CS_ECS (2) -#define CS_AGA (3) - /* * Miscellaneous diff --git a/arch/m68k/include/uapi/asm/bootinfo-amiga.h b/arch/m68k/include/uapi/asm/bootinfo-amiga.h index 2c32d671f232..28b6da07874a 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-amiga.h +++ b/arch/m68k/include/uapi/asm/bootinfo-amiga.h @@ -21,6 +21,38 @@ #define BI_AMIGA_SERPER 0x8007 /* serial port period (u_short) */ + /* + * Amiga models (BI_AMIGA_MODEL) + */ + +#define AMI_UNKNOWN 0 +#define AMI_500 1 +#define AMI_500PLUS 2 +#define AMI_600 3 +#define AMI_1000 4 +#define AMI_1200 5 +#define AMI_2000 6 +#define AMI_2500 7 +#define AMI_3000 8 +#define AMI_3000T 9 +#define AMI_3000PLUS 10 +#define AMI_4000 11 +#define AMI_4000T 12 +#define AMI_CDTV 13 +#define AMI_CD32 14 +#define AMI_DRACO 15 + + + /* + * Amiga chipsets (BI_AMIGA_CHIPSET) + */ + +#define CS_STONEAGE 0 +#define CS_OCS 1 +#define CS_ECS 2 +#define CS_AGA 3 + + /* * Latest Amiga bootinfo version */ From 7678e77d2a2fb2dd74a27b8283e030c580294a62 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 3 Oct 2013 11:31:08 +0200 Subject: [PATCH 060/981] m68k/UAPI: Move Apollo model definitions to Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/apollohw.h | 11 ++--------- arch/m68k/include/uapi/asm/bootinfo-apollo.h | 12 ++++++++++++ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/m68k/include/asm/apollohw.h b/arch/m68k/include/asm/apollohw.h index 6c19e0c22411..87fc899d32ee 100644 --- a/arch/m68k/include/asm/apollohw.h +++ b/arch/m68k/include/asm/apollohw.h @@ -5,18 +5,11 @@ #include -/* - apollo models -*/ +#include + extern u_long apollo_model; -#define APOLLO_UNKNOWN (0) -#define APOLLO_DN3000 (1) -#define APOLLO_DN3010 (2) -#define APOLLO_DN3500 (3) -#define APOLLO_DN4000 (4) -#define APOLLO_DN4500 (5) /* see scn2681 data sheet for more info. diff --git a/arch/m68k/include/uapi/asm/bootinfo-apollo.h b/arch/m68k/include/uapi/asm/bootinfo-apollo.h index 3a2051e822b0..69923697e131 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-apollo.h +++ b/arch/m68k/include/uapi/asm/bootinfo-apollo.h @@ -13,4 +13,16 @@ #define BI_APOLLO_MODEL 0x8000 /* model (u_long) */ + /* + * Apollo models (BI_APOLLO_MODEL) + */ + +#define APOLLO_UNKNOWN 0 +#define APOLLO_DN3000 1 +#define APOLLO_DN3010 2 +#define APOLLO_DN3500 3 +#define APOLLO_DN4000 4 +#define APOLLO_DN4500 5 + + #endif /* _UAPI_ASM_M68K_BOOTINFO_APOLLO_H */ From f3bd09e3dba76e76bec7e79a442f37b0762f4fa1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 3 Oct 2013 11:36:52 +0200 Subject: [PATCH 061/981] m68k/UAPI: Move HP300 model definitions to Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/hp300hw.h | 22 +++--------------- arch/m68k/include/uapi/asm/bootinfo-hp300.h | 25 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/arch/m68k/include/asm/hp300hw.h b/arch/m68k/include/asm/hp300hw.h index d998ea67c19c..64f5271dd7be 100644 --- a/arch/m68k/include/asm/hp300hw.h +++ b/arch/m68k/include/asm/hp300hw.h @@ -1,25 +1,9 @@ #ifndef _M68K_HP300HW_H #define _M68K_HP300HW_H +#include + + extern unsigned long hp300_model; -/* This information was taken from NetBSD */ -#define HP_320 (0) /* 16MHz 68020+HP MMU+16K external cache */ -#define HP_330 (1) /* 16MHz 68020+68851 MMU */ -#define HP_340 (2) /* 16MHz 68030 */ -#define HP_345 (3) /* 50MHz 68030+32K external cache */ -#define HP_350 (4) /* 25MHz 68020+HP MMU+32K external cache */ -#define HP_360 (5) /* 25MHz 68030 */ -#define HP_370 (6) /* 33MHz 68030+64K external cache */ -#define HP_375 (7) /* 50MHz 68030+32K external cache */ -#define HP_380 (8) /* 25MHz 68040 */ -#define HP_385 (9) /* 33MHz 68040 */ - -#define HP_400 (10) /* 50MHz 68030+32K external cache */ -#define HP_425T (11) /* 25MHz 68040 - model 425t */ -#define HP_425S (12) /* 25MHz 68040 - model 425s */ -#define HP_425E (13) /* 25MHz 68040 - model 425e */ -#define HP_433T (14) /* 33MHz 68040 - model 433t */ -#define HP_433S (15) /* 33MHz 68040 - model 433s */ - #endif /* _M68K_HP300HW_H */ diff --git a/arch/m68k/include/uapi/asm/bootinfo-hp300.h b/arch/m68k/include/uapi/asm/bootinfo-hp300.h index e7ed19cdafe5..08530e12d4c3 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-hp300.h +++ b/arch/m68k/include/uapi/asm/bootinfo-hp300.h @@ -15,6 +15,31 @@ #define BI_HP300_UART_ADDR 0x8002 /* phys. addr of UART (u_long) */ + /* + * HP9000/300 and /400 models (BI_HP300_MODEL) + * + * This information was taken from NetBSD + */ + +#define HP_320 0 /* 16MHz 68020+HP MMU+16K external cache */ +#define HP_330 1 /* 16MHz 68020+68851 MMU */ +#define HP_340 2 /* 16MHz 68030 */ +#define HP_345 3 /* 50MHz 68030+32K external cache */ +#define HP_350 4 /* 25MHz 68020+HP MMU+32K external cache */ +#define HP_360 5 /* 25MHz 68030 */ +#define HP_370 6 /* 33MHz 68030+64K external cache */ +#define HP_375 7 /* 50MHz 68030+32K external cache */ +#define HP_380 8 /* 25MHz 68040 */ +#define HP_385 9 /* 33MHz 68040 */ + +#define HP_400 10 /* 50MHz 68030+32K external cache */ +#define HP_425T 11 /* 25MHz 68040 - model 425t */ +#define HP_425S 12 /* 25MHz 68040 - model 425s */ +#define HP_425E 13 /* 25MHz 68040 - model 425e */ +#define HP_433T 14 /* 33MHz 68040 - model 433t */ +#define HP_433S 15 /* 33MHz 68040 - model 433s */ + + /* * Latest HP9000/300 bootinfo version */ From 8693d6167e16baab1ee900d94b16af586a26a916 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 3 Oct 2013 11:40:00 +0200 Subject: [PATCH 062/981] m68k/UAPI: Move Macintosh model definitions to Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/macintosh.h | 62 ++--------------------- arch/m68k/include/uapi/asm/bootinfo-mac.h | 60 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 59 deletions(-) diff --git a/arch/m68k/include/asm/macintosh.h b/arch/m68k/include/asm/macintosh.h index 6a633eb64331..d323b2c2d07d 100644 --- a/arch/m68k/include/asm/macintosh.h +++ b/arch/m68k/include/asm/macintosh.h @@ -4,6 +4,9 @@ #include #include +#include + + /* * Apple Macintoshisms */ @@ -74,65 +77,6 @@ struct mac_model #define MAC_FLOPPY_SWIM_IOP 3 #define MAC_FLOPPY_AV 4 -/* - * Gestalt numbers - */ - -#define MAC_MODEL_II 6 -#define MAC_MODEL_IIX 7 -#define MAC_MODEL_IICX 8 -#define MAC_MODEL_SE30 9 -#define MAC_MODEL_IICI 11 -#define MAC_MODEL_IIFX 13 /* And well numbered it is too */ -#define MAC_MODEL_IISI 18 -#define MAC_MODEL_LC 19 -#define MAC_MODEL_Q900 20 -#define MAC_MODEL_PB170 21 -#define MAC_MODEL_Q700 22 -#define MAC_MODEL_CLII 23 /* aka: P200 */ -#define MAC_MODEL_PB140 25 -#define MAC_MODEL_Q950 26 /* aka: WGS95 */ -#define MAC_MODEL_LCIII 27 /* aka: P450 */ -#define MAC_MODEL_PB210 29 -#define MAC_MODEL_C650 30 -#define MAC_MODEL_PB230 32 -#define MAC_MODEL_PB180 33 -#define MAC_MODEL_PB160 34 -#define MAC_MODEL_Q800 35 /* aka: WGS80 */ -#define MAC_MODEL_Q650 36 -#define MAC_MODEL_LCII 37 /* aka: P400/405/410/430 */ -#define MAC_MODEL_PB250 38 -#define MAC_MODEL_IIVI 44 -#define MAC_MODEL_P600 45 /* aka: P600CD */ -#define MAC_MODEL_IIVX 48 -#define MAC_MODEL_CCL 49 /* aka: P250 */ -#define MAC_MODEL_PB165C 50 -#define MAC_MODEL_C610 52 /* aka: WGS60 */ -#define MAC_MODEL_Q610 53 -#define MAC_MODEL_PB145 54 /* aka: PB145B */ -#define MAC_MODEL_P520 56 /* aka: LC520 */ -#define MAC_MODEL_C660 60 -#define MAC_MODEL_P460 62 /* aka: LCIII+, P466/P467 */ -#define MAC_MODEL_PB180C 71 -#define MAC_MODEL_PB520 72 /* aka: PB520C, PB540, PB540C, PB550C */ -#define MAC_MODEL_PB270C 77 -#define MAC_MODEL_Q840 78 -#define MAC_MODEL_P550 80 /* aka: LC550, P560 */ -#define MAC_MODEL_CCLII 83 /* aka: P275 */ -#define MAC_MODEL_PB165 84 -#define MAC_MODEL_PB190 85 /* aka: PB190CS */ -#define MAC_MODEL_TV 88 -#define MAC_MODEL_P475 89 /* aka: LC475, P476 */ -#define MAC_MODEL_P475F 90 /* aka: P475 w/ FPU (no LC040) */ -#define MAC_MODEL_P575 92 /* aka: LC575, P577/P578 */ -#define MAC_MODEL_Q605 94 -#define MAC_MODEL_Q605_ACC 95 /* Q605 accelerated to 33 MHz */ -#define MAC_MODEL_Q630 98 /* aka: LC630, P630/631/635/636/637/638/640 */ -#define MAC_MODEL_P588 99 /* aka: LC580, P580 */ -#define MAC_MODEL_PB280 102 -#define MAC_MODEL_PB280C 103 -#define MAC_MODEL_PB150 115 - extern struct mac_model *macintosh_config; diff --git a/arch/m68k/include/uapi/asm/bootinfo-mac.h b/arch/m68k/include/uapi/asm/bootinfo-mac.h index d4ef2115c8f9..971046f49bb9 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-mac.h +++ b/arch/m68k/include/uapi/asm/bootinfo-mac.h @@ -49,6 +49,66 @@ #define BI_MAC_IOP_ADB 0x8021 /* Mac ADB IOP */ + /* + * Macintosh Gestalt numbers (BI_MAC_MODEL) + */ + +#define MAC_MODEL_II 6 +#define MAC_MODEL_IIX 7 +#define MAC_MODEL_IICX 8 +#define MAC_MODEL_SE30 9 +#define MAC_MODEL_IICI 11 +#define MAC_MODEL_IIFX 13 /* And well numbered it is too */ +#define MAC_MODEL_IISI 18 +#define MAC_MODEL_LC 19 +#define MAC_MODEL_Q900 20 +#define MAC_MODEL_PB170 21 +#define MAC_MODEL_Q700 22 +#define MAC_MODEL_CLII 23 /* aka: P200 */ +#define MAC_MODEL_PB140 25 +#define MAC_MODEL_Q950 26 /* aka: WGS95 */ +#define MAC_MODEL_LCIII 27 /* aka: P450 */ +#define MAC_MODEL_PB210 29 +#define MAC_MODEL_C650 30 +#define MAC_MODEL_PB230 32 +#define MAC_MODEL_PB180 33 +#define MAC_MODEL_PB160 34 +#define MAC_MODEL_Q800 35 /* aka: WGS80 */ +#define MAC_MODEL_Q650 36 +#define MAC_MODEL_LCII 37 /* aka: P400/405/410/430 */ +#define MAC_MODEL_PB250 38 +#define MAC_MODEL_IIVI 44 +#define MAC_MODEL_P600 45 /* aka: P600CD */ +#define MAC_MODEL_IIVX 48 +#define MAC_MODEL_CCL 49 /* aka: P250 */ +#define MAC_MODEL_PB165C 50 +#define MAC_MODEL_C610 52 /* aka: WGS60 */ +#define MAC_MODEL_Q610 53 +#define MAC_MODEL_PB145 54 /* aka: PB145B */ +#define MAC_MODEL_P520 56 /* aka: LC520 */ +#define MAC_MODEL_C660 60 +#define MAC_MODEL_P460 62 /* aka: LCIII+, P466/P467 */ +#define MAC_MODEL_PB180C 71 +#define MAC_MODEL_PB520 72 /* aka: PB520C, PB540, PB540C, PB550C */ +#define MAC_MODEL_PB270C 77 +#define MAC_MODEL_Q840 78 +#define MAC_MODEL_P550 80 /* aka: LC550, P560 */ +#define MAC_MODEL_CCLII 83 /* aka: P275 */ +#define MAC_MODEL_PB165 84 +#define MAC_MODEL_PB190 85 /* aka: PB190CS */ +#define MAC_MODEL_TV 88 +#define MAC_MODEL_P475 89 /* aka: LC475, P476 */ +#define MAC_MODEL_P475F 90 /* aka: P475 w/ FPU (no LC040) */ +#define MAC_MODEL_P575 92 /* aka: LC575, P577/P578 */ +#define MAC_MODEL_Q605 94 +#define MAC_MODEL_Q605_ACC 95 /* Q605 accelerated to 33 MHz */ +#define MAC_MODEL_Q630 98 /* aka: LC630, P630/631/635/636/637/638/640 */ +#define MAC_MODEL_P588 99 /* aka: LC580, P580 */ +#define MAC_MODEL_PB280 102 +#define MAC_MODEL_PB280C 103 +#define MAC_MODEL_PB150 115 + + /* * Latest Macintosh bootinfo version */ From cf288bd5b122d12476fd7d9825c292daef5dba58 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 2 Oct 2013 22:40:44 +0200 Subject: [PATCH 063/981] m68k/UAPI: Move VME Board ID definition to Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/mvme16xhw.h | 15 -------------- arch/m68k/include/uapi/asm/bootinfo-vme.h | 25 +++++++++++++++++++++-- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/arch/m68k/include/asm/mvme16xhw.h b/arch/m68k/include/asm/mvme16xhw.h index 87fa84ad252e..1eb89de631e5 100644 --- a/arch/m68k/include/asm/mvme16xhw.h +++ b/arch/m68k/include/asm/mvme16xhw.h @@ -3,21 +3,6 @@ #include -/* Board ID data structure - pointer to this retrieved from Bug by head.S */ - -/* Note, bytes 12 and 13 are board no in BCD (0162,0166,0167,0177,etc) */ - -typedef struct { - char bdid[4]; - u_char rev, mth, day, yr; - u_short size, reserved; - u_short brdno; - char brdsuffix[2]; - u_long options; - u_short clun, dlun, ctype, dnum; - u_long option2; -} t_bdid, *p_bdid; - typedef struct { u_char ack_icr, diff --git a/arch/m68k/include/uapi/asm/bootinfo-vme.h b/arch/m68k/include/uapi/asm/bootinfo-vme.h index 4643a55ed768..13ba5e19fe24 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-vme.h +++ b/arch/m68k/include/uapi/asm/bootinfo-vme.h @@ -28,11 +28,32 @@ #define VME_TYPE_BVME4000 0x4000 /* BVM Ltd. BVME4000 */ #define VME_TYPE_BVME6000 0x6000 /* BVM Ltd. BVME6000 */ -/* BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on + +#ifndef __ASSEMBLY__ + +/* + * Board ID data structure - pointer to this retrieved from Bug by head.S + * + * BI_VME_BRDINFO is a 32 byte struct as returned by the Bug code on * Motorola VME boards. Contains board number, Bug version, board - * configuration options, etc. See include/asm/mvme16xhw.h for details. + * configuration options, etc. + * + * Note, bytes 12 and 13 are board no in BCD (0162,0166,0167,0177,etc) */ +typedef struct { + char bdid[4]; + u_char rev, mth, day, yr; + u_short size, reserved; + u_short brdno; + char brdsuffix[2]; + u_long options; + u_short clun, dlun, ctype, dnum; + u_long option2; +} t_bdid, *p_bdid; + +#endif /* __ASSEMBLY__ */ + /* * Latest VME bootinfo versions From abe48101c17eaf1b5d85270272392e6111562626 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Oct 2013 11:41:24 +0200 Subject: [PATCH 064/981] m68k/UAPI: Use proper types (endianness/size) in Signed-off-by: Geert Uytterhoeven --- arch/m68k/amiga/config.c | 18 +++++------ arch/m68k/apollo/config.c | 15 ++++----- arch/m68k/atari/config.c | 9 +++--- arch/m68k/bvme6000/config.c | 3 +- arch/m68k/hp300/config.c | 9 +++--- arch/m68k/include/uapi/asm/bootinfo-amiga.h | 14 ++++----- arch/m68k/include/uapi/asm/bootinfo-apollo.h | 2 +- arch/m68k/include/uapi/asm/bootinfo-atari.h | 4 +-- arch/m68k/include/uapi/asm/bootinfo-hp300.h | 6 ++-- arch/m68k/include/uapi/asm/bootinfo-mac.h | 2 +- arch/m68k/include/uapi/asm/bootinfo-vme.h | 17 +++++----- arch/m68k/include/uapi/asm/bootinfo.h | 29 +++++++++-------- arch/m68k/kernel/setup_mm.c | 33 +++++++++++++------- arch/m68k/mac/config.c | 32 ++++++++++--------- arch/m68k/mvme147/config.c | 4 ++- arch/m68k/mvme16x/config.c | 27 +++++++++------- 16 files changed, 125 insertions(+), 99 deletions(-) diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 0c92c1baf9bf..06920e85fac9 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -145,37 +145,37 @@ static struct resource ram_resource[NUM_MEMINFO]; int __init amiga_parse_bootinfo(const struct bi_record *record) { int unknown = 0; - const unsigned long *data = record->data; + const void *data = record->data; - switch (record->tag) { + switch (be16_to_cpu(record->tag)) { case BI_AMIGA_MODEL: - amiga_model = *data; + amiga_model = be32_to_cpup(data); break; case BI_AMIGA_ECLOCK: - amiga_eclock = *data; + amiga_eclock = be32_to_cpup(data); break; case BI_AMIGA_CHIPSET: - amiga_chipset = *data; + amiga_chipset = be32_to_cpup(data); break; case BI_AMIGA_CHIP_SIZE: - amiga_chip_size = *(const int *)data; + amiga_chip_size = be32_to_cpup(data); break; case BI_AMIGA_VBLANK: - amiga_vblank = *(const unsigned char *)data; + amiga_vblank = *(const __u8 *)data; break; case BI_AMIGA_PSFREQ: - amiga_psfreq = *(const unsigned char *)data; + amiga_psfreq = *(const __u8 *)data; break; case BI_AMIGA_AUTOCON: #ifdef CONFIG_ZORRO if (zorro_num_autocon < ZORRO_NUM_AUTO) { - const struct ConfigDev *cd = (struct ConfigDev *)data; + const struct ConfigDev *cd = data; struct zorro_dev_init *dev = &zorro_autocon_init[zorro_num_autocon++]; dev->rom = cd->cd_Rom; dev->slotaddr = be16_to_cpu(cd->cd_SlotAddr); diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c index c90c19e904bc..9268c0f96376 100644 --- a/arch/m68k/apollo/config.c +++ b/arch/m68k/apollo/config.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -48,15 +49,15 @@ static const char *apollo_models[] = { int __init apollo_parse_bootinfo(const struct bi_record *record) { int unknown = 0; - const unsigned long *data = record->data; + const void *data = record->data; - switch(record->tag) { - case BI_APOLLO_MODEL: - apollo_model=*data; - break; + switch (be16_to_cpu(record->tag)) { + case BI_APOLLO_MODEL: + apollo_model = be32_to_cpup(data); + break; - default: - unknown=1; + default: + unknown=1; } return unknown; diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index 9159195505d5..01a62161b08a 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -130,14 +131,14 @@ static int __init scc_test(volatile char *ctla) int __init atari_parse_bootinfo(const struct bi_record *record) { int unknown = 0; - const u_long *data = record->data; + const void *data = record->data; - switch (record->tag) { + switch (be16_to_cpu(record->tag)) { case BI_ATARI_MCH_COOKIE: - atari_mch_cookie = *data; + atari_mch_cookie = be32_to_cpup(data); break; case BI_ATARI_MCH_TYPE: - atari_mch_type = *data; + atari_mch_type = be32_to_cpup(data); break; default: unknown = 1; diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 4d1b403822fa..478623dbb209 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -53,7 +54,7 @@ static irq_handler_t tick_handler; int __init bvme6000_parse_bootinfo(const struct bi_record *bi) { - if (bi->tag == BI_VME_TYPE) + if (be16_to_cpu(bi->tag) == BI_VME_TYPE) return 0; else return 1; diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c index 892d0794fc0b..2e5a787ea11b 100644 --- a/arch/m68k/hp300/config.c +++ b/arch/m68k/hp300/config.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include /* readb() and writeb() */ @@ -71,15 +72,15 @@ extern int hp300_setup_serial_console(void) __init; int __init hp300_parse_bootinfo(const struct bi_record *record) { int unknown = 0; - const unsigned long *data = record->data; + const void *data = record->data; - switch (record->tag) { + switch (be16_to_cpu(record->tag)) { case BI_HP300_MODEL: - hp300_model = *data; + hp300_model = be32_to_cpup(data); break; case BI_HP300_UART_SCODE: - hp300_uart_scode = *data; + hp300_uart_scode = be32_to_cpup(data); break; case BI_HP300_UART_ADDR: diff --git a/arch/m68k/include/uapi/asm/bootinfo-amiga.h b/arch/m68k/include/uapi/asm/bootinfo-amiga.h index 28b6da07874a..daad3c58d2da 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-amiga.h +++ b/arch/m68k/include/uapi/asm/bootinfo-amiga.h @@ -10,15 +10,15 @@ * Amiga-specific tags */ -#define BI_AMIGA_MODEL 0x8000 /* model (u_long) */ +#define BI_AMIGA_MODEL 0x8000 /* model (__be32) */ #define BI_AMIGA_AUTOCON 0x8001 /* AutoConfig device */ /* (AmigaOS struct ConfigDev) */ -#define BI_AMIGA_CHIP_SIZE 0x8002 /* size of Chip RAM (u_long) */ -#define BI_AMIGA_VBLANK 0x8003 /* VBLANK frequency (u_char) */ -#define BI_AMIGA_PSFREQ 0x8004 /* power supply frequency (u_char) */ -#define BI_AMIGA_ECLOCK 0x8005 /* EClock frequency (u_long) */ -#define BI_AMIGA_CHIPSET 0x8006 /* native chipset present (u_long) */ -#define BI_AMIGA_SERPER 0x8007 /* serial port period (u_short) */ +#define BI_AMIGA_CHIP_SIZE 0x8002 /* size of Chip RAM (__be32) */ +#define BI_AMIGA_VBLANK 0x8003 /* VBLANK frequency (__u8) */ +#define BI_AMIGA_PSFREQ 0x8004 /* power supply frequency (__u8) */ +#define BI_AMIGA_ECLOCK 0x8005 /* EClock frequency (__be32) */ +#define BI_AMIGA_CHIPSET 0x8006 /* native chipset present (__be32) */ +#define BI_AMIGA_SERPER 0x8007 /* serial port period (__be16) */ /* diff --git a/arch/m68k/include/uapi/asm/bootinfo-apollo.h b/arch/m68k/include/uapi/asm/bootinfo-apollo.h index 69923697e131..a93e0af1c6fe 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-apollo.h +++ b/arch/m68k/include/uapi/asm/bootinfo-apollo.h @@ -10,7 +10,7 @@ * Apollo-specific tags */ -#define BI_APOLLO_MODEL 0x8000 /* model (u_long) */ +#define BI_APOLLO_MODEL 0x8000 /* model (__be32) */ /* diff --git a/arch/m68k/include/uapi/asm/bootinfo-atari.h b/arch/m68k/include/uapi/asm/bootinfo-atari.h index cca0a83fc0e5..a817854049bb 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-atari.h +++ b/arch/m68k/include/uapi/asm/bootinfo-atari.h @@ -10,8 +10,8 @@ * Atari-specific tags */ -#define BI_ATARI_MCH_COOKIE 0x8000 /* _MCH cookie from TOS (u_long) */ -#define BI_ATARI_MCH_TYPE 0x8001 /* special machine type (u_long) */ +#define BI_ATARI_MCH_COOKIE 0x8000 /* _MCH cookie from TOS (__be32) */ +#define BI_ATARI_MCH_TYPE 0x8001 /* special machine type (__be32) */ /* diff --git a/arch/m68k/include/uapi/asm/bootinfo-hp300.h b/arch/m68k/include/uapi/asm/bootinfo-hp300.h index 08530e12d4c3..c90cb71ed89a 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-hp300.h +++ b/arch/m68k/include/uapi/asm/bootinfo-hp300.h @@ -10,9 +10,9 @@ * HP9000/300-specific tags */ -#define BI_HP300_MODEL 0x8000 /* model (u_long) */ -#define BI_HP300_UART_SCODE 0x8001 /* UART select code (u_long) */ -#define BI_HP300_UART_ADDR 0x8002 /* phys. addr of UART (u_long) */ +#define BI_HP300_MODEL 0x8000 /* model (__be32) */ +#define BI_HP300_UART_SCODE 0x8001 /* UART select code (__be32) */ +#define BI_HP300_UART_ADDR 0x8002 /* phys. addr of UART (__be32) */ /* diff --git a/arch/m68k/include/uapi/asm/bootinfo-mac.h b/arch/m68k/include/uapi/asm/bootinfo-mac.h index 971046f49bb9..b44ff73898a9 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-mac.h +++ b/arch/m68k/include/uapi/asm/bootinfo-mac.h @@ -7,7 +7,7 @@ /* - * Macintosh-specific tags (all u_long) + * Macintosh-specific tags (all __be32) */ #define BI_MAC_MODEL 0x8000 /* Mac Gestalt ID (model type) */ diff --git a/arch/m68k/include/uapi/asm/bootinfo-vme.h b/arch/m68k/include/uapi/asm/bootinfo-vme.h index 13ba5e19fe24..a135eb41d672 100644 --- a/arch/m68k/include/uapi/asm/bootinfo-vme.h +++ b/arch/m68k/include/uapi/asm/bootinfo-vme.h @@ -6,11 +6,14 @@ #define _UAPI_ASM_M68K_BOOTINFO_VME_H +#include + + /* * VME-specific tags */ -#define BI_VME_TYPE 0x8000 /* VME sub-architecture (u_long) */ +#define BI_VME_TYPE 0x8000 /* VME sub-architecture (__be32) */ #define BI_VME_BRDINFO 0x8001 /* VME board information (struct) */ @@ -43,13 +46,13 @@ typedef struct { char bdid[4]; - u_char rev, mth, day, yr; - u_short size, reserved; - u_short brdno; + __u8 rev, mth, day, yr; + __be16 size, reserved; + __be16 brdno; char brdsuffix[2]; - u_long options; - u_short clun, dlun, ctype, dnum; - u_long option2; + __be32 options; + __be16 clun, dlun, ctype, dnum; + __be32 option2; } t_bdid, *p_bdid; #endif /* __ASSEMBLY__ */ diff --git a/arch/m68k/include/uapi/asm/bootinfo.h b/arch/m68k/include/uapi/asm/bootinfo.h index 9d64599d7bf5..cdeb26a015b0 100644 --- a/arch/m68k/include/uapi/asm/bootinfo.h +++ b/arch/m68k/include/uapi/asm/bootinfo.h @@ -12,6 +12,9 @@ #define _UAPI_ASM_M68K_BOOTINFO_H +#include + + #ifndef __ASSEMBLY__ /* @@ -28,15 +31,15 @@ */ struct bi_record { - unsigned short tag; /* tag ID */ - unsigned short size; /* size of record (in bytes) */ - unsigned long data[0]; /* data */ + __be16 tag; /* tag ID */ + __be16 size; /* size of record (in bytes) */ + __be32 data[0]; /* data */ }; struct mem_info { - unsigned long addr; /* physical address of memory chunk */ - unsigned long size; /* length of memory chunk (in bytes) */ + __be32 addr; /* physical address of memory chunk */ + __be32 size; /* length of memory chunk (in bytes) */ }; #endif /* __ASSEMBLY__ */ @@ -50,10 +53,10 @@ struct mem_info { */ #define BI_LAST 0x0000 /* last record (sentinel) */ -#define BI_MACHTYPE 0x0001 /* machine type (u_long) */ -#define BI_CPUTYPE 0x0002 /* cpu type (u_long) */ -#define BI_FPUTYPE 0x0003 /* fpu type (u_long) */ -#define BI_MMUTYPE 0x0004 /* mmu type (u_long) */ +#define BI_MACHTYPE 0x0001 /* machine type (__be32) */ +#define BI_CPUTYPE 0x0002 /* cpu type (__be32) */ +#define BI_FPUTYPE 0x0003 /* fpu type (__be32) */ +#define BI_MMUTYPE 0x0004 /* mmu type (__be32) */ #define BI_MEMCHUNK 0x0005 /* memory chunk address and size */ /* (struct mem_info) */ #define BI_RAMDISK 0x0006 /* ramdisk address and size */ @@ -157,11 +160,11 @@ struct mem_info { #ifndef __ASSEMBLY__ struct bootversion { - unsigned short branch; - unsigned long magic; + __be16 branch; + __be32 magic; struct { - unsigned long machtype; - unsigned long version; + __be32 machtype; + __be32 version; } machversions[0]; } __packed; diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index cff9845708c0..0191485d7b7d 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -143,11 +144,14 @@ extern void paging_init(void); static void __init m68k_parse_bootinfo(const struct bi_record *record) { - while (record->tag != BI_LAST) { - int unknown = 0; - const unsigned long *data = record->data; + uint16_t tag; - switch (record->tag) { + while ((tag = be16_to_cpu(record->tag)) != BI_LAST) { + int unknown = 0; + const void *data = record->data; + uint16_t size = be16_to_cpu(record->size); + + switch (tag) { case BI_MACHTYPE: case BI_CPUTYPE: case BI_FPUTYPE: @@ -157,8 +161,11 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) case BI_MEMCHUNK: if (m68k_num_memory < NUM_MEMINFO) { - m68k_memory[m68k_num_memory].addr = data[0]; - m68k_memory[m68k_num_memory].size = data[1]; + const struct mem_info *m = data; + m68k_memory[m68k_num_memory].addr = + be32_to_cpu(m->addr); + m68k_memory[m68k_num_memory].size = + be32_to_cpu(m->size); m68k_num_memory++; } else pr_warn("%s: too many memory chunks\n", @@ -166,12 +173,15 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) break; case BI_RAMDISK: - m68k_ramdisk.addr = data[0]; - m68k_ramdisk.size = data[1]; + { + const struct mem_info *m = data; + m68k_ramdisk.addr = be32_to_cpu(m->addr); + m68k_ramdisk.size = be32_to_cpu(m->size); + } break; case BI_COMMAND_LINE: - strlcpy(m68k_command_line, (const char *)data, + strlcpy(m68k_command_line, data, sizeof(m68k_command_line)); break; @@ -199,9 +209,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) } if (unknown) pr_warn("%s: unknown tag 0x%04x ignored\n", __func__, - record->tag); - record = (struct bi_record *)((unsigned long)record + - record->size); + tag); + record = (struct bi_record *)((unsigned long)record + size); } m68k_realnum_memory = m68k_num_memory; diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index e48069da04ed..982c3fe73c4a 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -107,45 +108,46 @@ static void __init mac_sched_init(irq_handler_t vector) int __init mac_parse_bootinfo(const struct bi_record *record) { int unknown = 0; - const u_long *data = record->data; + const void *data = record->data; - switch (record->tag) { + switch (be16_to_cpu(record->tag)) { case BI_MAC_MODEL: - mac_bi_data.id = *data; + mac_bi_data.id = be32_to_cpup(data); break; case BI_MAC_VADDR: - mac_bi_data.videoaddr = *data; + mac_bi_data.videoaddr = be32_to_cpup(data); break; case BI_MAC_VDEPTH: - mac_bi_data.videodepth = *data; + mac_bi_data.videodepth = be32_to_cpup(data); break; case BI_MAC_VROW: - mac_bi_data.videorow = *data; + mac_bi_data.videorow = be32_to_cpup(data); break; case BI_MAC_VDIM: - mac_bi_data.dimensions = *data; + mac_bi_data.dimensions = be32_to_cpup(data); break; case BI_MAC_VLOGICAL: - mac_bi_data.videological = VIDEOMEMBASE + (*data & ~VIDEOMEMMASK); - mac_orig_videoaddr = *data; + mac_orig_videoaddr = be32_to_cpup(data); + mac_bi_data.videological = + VIDEOMEMBASE + (mac_orig_videoaddr & ~VIDEOMEMMASK); break; case BI_MAC_SCCBASE: - mac_bi_data.sccbase = *data; + mac_bi_data.sccbase = be32_to_cpup(data); break; case BI_MAC_BTIME: - mac_bi_data.boottime = *data; + mac_bi_data.boottime = be32_to_cpup(data); break; case BI_MAC_GMTBIAS: - mac_bi_data.gmtbias = *data; + mac_bi_data.gmtbias = be32_to_cpup(data); break; case BI_MAC_MEMSIZE: - mac_bi_data.memsize = *data; + mac_bi_data.memsize = be32_to_cpup(data); break; case BI_MAC_CPUID: - mac_bi_data.cpuid = *data; + mac_bi_data.cpuid = be32_to_cpup(data); break; case BI_MAC_ROMBASE: - mac_bi_data.rombase = *data; + mac_bi_data.rombase = be32_to_cpup(data); break; default: unknown = 1; diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 3a1d47564e52..1bb3ce6634d3 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -54,7 +55,8 @@ irq_handler_t tick_handler; int __init mvme147_parse_bootinfo(const struct bi_record *bi) { - if (bi->tag == BI_VME_TYPE || bi->tag == BI_VME_BRDINFO) + uint16_t tag = be16_to_cpu(bi->tag); + if (tag == BI_VME_TYPE || tag == BI_VME_BRDINFO) return 0; else return 1; diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index e05994fd048c..eab7d342757e 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -63,7 +64,8 @@ EXPORT_SYMBOL(mvme16x_config); int __init mvme16x_parse_bootinfo(const struct bi_record *bi) { - if (bi->tag == BI_VME_TYPE || bi->tag == BI_VME_BRDINFO) + uint16_t tag = be16_to_cpu(bi->tag); + if (tag == BI_VME_TYPE || tag == BI_VME_BRDINFO) return 0; else return 1; @@ -88,15 +90,15 @@ static void mvme16x_get_model(char *model) suf[3] = '\0'; suf[0] = suf[1] ? '-' : '\0'; - sprintf(model, "Motorola MVME%x%s", p->brdno, suf); + sprintf(model, "Motorola MVME%x%s", be16_to_cpu(p->brdno), suf); } static void mvme16x_get_hardware_list(struct seq_file *m) { - p_bdid p = &mvme_bdid; + uint16_t brdno = be16_to_cpu(mvme_bdid.brdno); - if (p->brdno == 0x0162 || p->brdno == 0x0172) + if (brdno == 0x0162 || brdno == 0x0172) { unsigned char rev = *(unsigned char *)MVME162_VERSION_REG; @@ -286,6 +288,7 @@ void __init config_mvme16x(void) { p_bdid p = &mvme_bdid; char id[40]; + uint16_t brdno = be16_to_cpu(p->brdno); mach_max_dma_address = 0xffffffff; mach_sched_init = mvme16x_sched_init; @@ -307,18 +310,18 @@ void __init config_mvme16x(void) } /* Board type is only set by newer versions of vmelilo/tftplilo */ if (vme_brdtype == 0) - vme_brdtype = p->brdno; + vme_brdtype = brdno; mvme16x_get_model(id); printk ("\nBRD_ID: %s BUG %x.%x %02x/%02x/%02x\n", id, p->rev>>4, p->rev&0xf, p->yr, p->mth, p->day); - if (p->brdno == 0x0162 || p->brdno == 0x172) + if (brdno == 0x0162 || brdno == 0x172) { unsigned char rev = *(unsigned char *)MVME162_VERSION_REG; mvme16x_config = rev | MVME16x_CONFIG_GOT_SCCA; - printk ("MVME%x Hardware status:\n", p->brdno); + printk ("MVME%x Hardware status:\n", brdno); printk (" CPU Type 68%s040\n", rev & MVME16x_CONFIG_GOT_FPU ? "" : "LC"); printk (" CPU clock %dMHz\n", @@ -348,12 +351,12 @@ void __init config_mvme16x(void) static irqreturn_t mvme16x_abort_int (int irq, void *dev_id) { - p_bdid p = &mvme_bdid; unsigned long *new = (unsigned long *)vectors; unsigned long *old = (unsigned long *)0xffe00000; volatile unsigned char uc, *ucp; + uint16_t brdno = be16_to_cpu(mvme_bdid.brdno); - if (p->brdno == 0x0162 || p->brdno == 0x172) + if (brdno == 0x0162 || brdno == 0x172) { ucp = (volatile unsigned char *)0xfff42043; uc = *ucp | 8; @@ -367,7 +370,7 @@ static irqreturn_t mvme16x_abort_int (int irq, void *dev_id) *(new+9) = *(old+9); /* Trace */ *(new+47) = *(old+47); /* Trap #15 */ - if (p->brdno == 0x0162 || p->brdno == 0x172) + if (brdno == 0x0162 || brdno == 0x172) *(new+0x5e) = *(old+0x5e); /* ABORT switch */ else *(new+0x6e) = *(old+0x6e); /* ABORT switch */ @@ -382,7 +385,7 @@ static irqreturn_t mvme16x_timer_int (int irq, void *dev_id) void mvme16x_sched_init (irq_handler_t timer_routine) { - p_bdid p = &mvme_bdid; + uint16_t brdno = be16_to_cpu(mvme_bdid.brdno); int irq; tick_handler = timer_routine; @@ -395,7 +398,7 @@ void mvme16x_sched_init (irq_handler_t timer_routine) "timer", mvme16x_timer_int)) panic ("Couldn't register timer int"); - if (p->brdno == 0x0162 || p->brdno == 0x172) + if (brdno == 0x0162 || brdno == 0x172) irq = MVME162_IRQ_ABORT; else irq = MVME167_IRQ_ABORT; From 371001e502e8cd3543f7b9907d398a112939dff7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 5 Oct 2013 21:14:22 +0200 Subject: [PATCH 065/981] m68k: Remove superfluous inclusions of Signed-off-by: Geert Uytterhoeven --- arch/m68k/mac/iop.c | 1 - arch/m68k/mac/misc.c | 1 - arch/m68k/mac/oss.c | 1 - arch/m68k/mac/psc.c | 1 - arch/m68k/mac/via.c | 1 - arch/m68k/sun3x/prom.c | 1 - drivers/net/ethernet/natsemi/macsonic.c | 1 - drivers/video/macfb.c | 1 - drivers/video/valkyriefb.c | 1 - 9 files changed, 9 deletions(-) diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index ed0d9b0473ea..4d2adfb32a2a 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -111,7 +111,6 @@ #include #include -#include #include #include #include diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index 3d02a3c3a93a..707b61aea203 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -25,7 +25,6 @@ #include #include -#include #include /* Offset between Unix time (1970-based) and Mac time (1904-based) */ diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c index 6c4c882c126e..54037125ebf8 100644 --- a/arch/m68k/mac/oss.c +++ b/arch/m68k/mac/oss.c @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index cc9f3f097be7..835fa04511c8 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 5d1458bb871b..e198dec868e4 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -30,7 +30,6 @@ #include #include -#include #include #include #include diff --git a/arch/m68k/sun3x/prom.c b/arch/m68k/sun3x/prom.c index a7b7e818d627..0898c3f81508 100644 --- a/arch/m68k/sun3x/prom.c +++ b/arch/m68k/sun3x/prom.c @@ -10,7 +10,6 @@ #include #include -#include #include #include #include diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index 346a4e025c34..04b3ec1352f1 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -52,7 +52,6 @@ #include #include -#include #include #include #include diff --git a/drivers/video/macfb.c b/drivers/video/macfb.c index 5bd2eb8d4f39..cda7587cbc86 100644 --- a/drivers/video/macfb.c +++ b/drivers/video/macfb.c @@ -34,7 +34,6 @@ #include #include -#include #include #include diff --git a/drivers/video/valkyriefb.c b/drivers/video/valkyriefb.c index e287ebc47817..97cb9bd1d1dd 100644 --- a/drivers/video/valkyriefb.c +++ b/drivers/video/valkyriefb.c @@ -56,7 +56,6 @@ #include #include #ifdef CONFIG_MAC -#include #include #else #include From 7a15dd5c4b9d916cb1734dfd79dbdd5773772abb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 18 Oct 2013 09:05:22 +0200 Subject: [PATCH 066/981] m68k/atari: Call paging_init() before nf_init() nf_init() uses virt_to_phys(), which depends on m68k_memoffset being set and module_fixup() having been called, but this is only done in paging_init(). Hence call paging_init() before nf_init(). This went unnoticed, as virt_to_phys() is a no-op on Atari, unless you start fiddling with the memory blocks in the bootinfo manually. Signed-off-by: Geert Uytterhoeven --- arch/m68k/kernel/setup_mm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 0191485d7b7d..28abca421974 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -346,12 +346,12 @@ void __init setup_arch(char **cmdline_p) panic("No configuration setup"); } + paging_init(); + #ifdef CONFIG_NATFEAT nf_init(); #endif - paging_init(); - #ifndef CONFIG_SUN3 for (i = 1; i < m68k_num_memory; i++) free_bootmem_node(NODE_DATA(i), m68k_memory[i].addr, From 017cecee99d897ae426f3948e7b970eb4f95e1f4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 18 Oct 2013 13:10:08 +0200 Subject: [PATCH 067/981] m68k: Add infrastructure for machine-specific random_get_entropy() On m68k, get_cycles() (the default implementation for random_get_entropy()) always returns zero, providing no entropy for the random driver. Add a hook where platforms can provide their own implementation, and wire it up in the infrastructure provided by commit 61875f30daf60305712e25b209ef41ced2635bad ("random: allow architectures to optionally define random_get_entropy()"). Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/timex.h | 10 ++++++++++ arch/m68k/kernel/time.c | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h index 6759dad954f6..efc1f4892357 100644 --- a/arch/m68k/include/asm/timex.h +++ b/arch/m68k/include/asm/timex.h @@ -28,4 +28,14 @@ static inline cycles_t get_cycles(void) return 0; } +extern unsigned long (*mach_random_get_entropy)(void); + +static inline unsigned long random_get_entropy(void) +{ + if (mach_random_get_entropy) + return mach_random_get_entropy(); + return 0; +} +#define random_get_entropy random_get_entropy + #endif diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index 7eb9792009f8..958f1adb9d0c 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -28,6 +28,10 @@ #include #include + +unsigned long (*mach_random_get_entropy)(void); + + /* * timer_interrupt() needs to keep up the real-time clock, * as well as call the "xtime_update()" routine every clocktick From 50190edb2a56bb9efa3e2f8ca32482dbd3e42412 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 18 Oct 2013 13:16:06 +0200 Subject: [PATCH 068/981] m68k/amiga: Provide mach_random_get_entropy() Use the beam position registers, which provide at least 17 bits of data changing at 1.79 MHz. Signed-off-by: Geert Uytterhoeven --- arch/m68k/amiga/config.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 06920e85fac9..c425fa6c2795 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -360,6 +360,14 @@ static void __init amiga_identify(void) #undef AMIGAHW_ANNOUNCE } + +static unsigned long amiga_random_get_entropy(void) +{ + /* VPOSR/VHPOSR provide at least 17 bits of data changing at 1.79 MHz */ + return *(unsigned long *)&amiga_custom.vposr; +} + + /* * Setup the Amiga configuration info */ @@ -397,6 +405,8 @@ void __init config_amiga(void) mach_heartbeat = amiga_heartbeat; #endif + mach_random_get_entropy = amiga_random_get_entropy; + /* Fill in the clock value (based on the 700 kHz E-Clock) */ amiga_colorclock = 5*amiga_eclock; /* 3.5 MHz */ From 5800dc3cff87c3a1548382298bb16e1fb4ec7e32 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 25 Nov 2013 23:23:04 +0000 Subject: [PATCH 069/981] panic: Make panic_timeout configurable The panic_timeout value can be set via the command line option 'panic=x', or via /proc/sys/kernel/panic, however that is not sufficient when the panic occurs before we are able to set up these values. Thus, add a CONFIG_PANIC_TIMEOUT so that we can set the desired value from the .config. The default panic_timeout value continues to be 0 - wait forever. Also adds set_arch_panic_timeout(new_timeout, arch_default_timeout), which is intended to be used by arches in arch_setup(). The idea being that the new_timeout is only set if the user hasn't changed from the arch_default_timeout. Signed-off-by: Jason Baron Cc: benh@kernel.crashing.org Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: mpe@ellerman.id.au Cc: felipe.contreras@gmail.com Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1a1674daec27c534df409697025ac568ebcee91e.1385418410.git.jbaron@akamai.com Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 9 +++++++++ kernel/panic.c | 2 +- lib/Kconfig.debug | 9 +++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d4e98d13eff4..2ac02772a86e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -393,6 +393,15 @@ extern int panic_on_oops; extern int panic_on_unrecovered_nmi; extern int panic_on_io_nmi; extern int sysctl_panic_on_stackoverflow; +/* + * Only to be used by arch init code. If the user over-wrote the default + * CONFIG_PANIC_TIMEOUT, honor it. + */ +static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) +{ + if (panic_timeout == arch_default_timeout) + panic_timeout = timeout; +} extern const char *print_tainted(void); enum lockdep_ok { LOCKDEP_STILL_OK, diff --git a/kernel/panic.c b/kernel/panic.c index c00b4ceb39e8..6d6300375090 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -33,7 +33,7 @@ static int pause_on_oops; static int pause_on_oops_flag; static DEFINE_SPINLOCK(pause_on_oops_lock); -int panic_timeout; +int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); ATOMIC_NOTIFIER_HEAD(panic_notifier_list); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index db25707aa41b..6982094a7e74 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -761,6 +761,15 @@ config PANIC_ON_OOPS_VALUE default 0 if !PANIC_ON_OOPS default 1 if PANIC_ON_OOPS +config PANIC_TIMEOUT + int "panic timeout" + default 0 + help + Set the timeout value (in seconds) until a reboot occurs when the + the kernel panics. If n = 0, then we wait forever. A timeout + value n > 0 will wait n seconds before rebooting, while a timeout + value n < 0 will reboot immediately. + config SCHED_DEBUG bool "Collect scheduler debugging info" depends on DEBUG_KERNEL && PROC_FS From 7972e966b032939afe63d8db22975240879dfd2a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Mon, 25 Nov 2013 23:23:07 +0000 Subject: [PATCH 070/981] MIPS: Remove panic_timeout settings Now that we have a CONFIG_PANIC_TIMEOUT=x setting, remove the mips settings. The default is 0, which means don't reboot on panic. Signed-off-by: Ralf Baechle Acked-by: Shinya Kuribayashi Signed-off-by: Jason Baron Cc: benh@kernel.crashing.org Cc: paulus@samba.org Cc: mpe@ellerman.id.au Cc: felipe.contreras@gmail.com Cc: linux-mips@linux-mips.org Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/d19dc75fca343ec5d9ada75a1400f57330021976.1385418410.git.jbaron@akamai.com Signed-off-by: Ingo Molnar --- arch/mips/ar7/setup.c | 1 - arch/mips/emma/markeins/setup.c | 3 --- arch/mips/netlogic/xlp/setup.c | 1 - arch/mips/netlogic/xlr/setup.c | 1 - arch/mips/sibyte/swarm/setup.c | 2 -- 5 files changed, 8 deletions(-) diff --git a/arch/mips/ar7/setup.c b/arch/mips/ar7/setup.c index 9a357fffcfbe..820b7a313d9b 100644 --- a/arch/mips/ar7/setup.c +++ b/arch/mips/ar7/setup.c @@ -92,7 +92,6 @@ void __init plat_mem_setup(void) _machine_restart = ar7_machine_restart; _machine_halt = ar7_machine_halt; pm_power_off = ar7_machine_power_off; - panic_timeout = 3; io_base = (unsigned long)ioremap(AR7_REGS_BASE, 0x10000); if (!io_base) diff --git a/arch/mips/emma/markeins/setup.c b/arch/mips/emma/markeins/setup.c index d71005835c00..9100122e5cef 100644 --- a/arch/mips/emma/markeins/setup.c +++ b/arch/mips/emma/markeins/setup.c @@ -111,9 +111,6 @@ void __init plat_mem_setup(void) iomem_resource.start = EMMA2RH_IO_BASE; iomem_resource.end = EMMA2RH_ROM_BASE - 1; - /* Reboot on panic */ - panic_timeout = 180; - markeins_sio_setup(); } diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c index 6d981bb337ec..54e75c77184b 100644 --- a/arch/mips/netlogic/xlp/setup.c +++ b/arch/mips/netlogic/xlp/setup.c @@ -92,7 +92,6 @@ static void __init xlp_init_mem_from_bars(void) void __init plat_mem_setup(void) { - panic_timeout = 5; _machine_restart = (void (*)(char *))nlm_linux_exit; _machine_halt = nlm_linux_exit; pm_power_off = nlm_linux_exit; diff --git a/arch/mips/netlogic/xlr/setup.c b/arch/mips/netlogic/xlr/setup.c index 214d123b79fa..921be5f77797 100644 --- a/arch/mips/netlogic/xlr/setup.c +++ b/arch/mips/netlogic/xlr/setup.c @@ -92,7 +92,6 @@ static void nlm_linux_exit(void) void __init plat_mem_setup(void) { - panic_timeout = 5; _machine_restart = (void (*)(char *))nlm_linux_exit; _machine_halt = nlm_linux_exit; pm_power_off = nlm_linux_exit; diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c index 41707a245dea..3462c831d0ea 100644 --- a/arch/mips/sibyte/swarm/setup.c +++ b/arch/mips/sibyte/swarm/setup.c @@ -134,8 +134,6 @@ void __init plat_mem_setup(void) #error invalid SiByte board configuration #endif - panic_timeout = 5; /* For debug. */ - board_be_handler = swarm_be_handler; if (xicor_probe()) From b71d47c14fba6270c0b5a0d56639bf042017025b Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 25 Nov 2013 23:23:11 +0000 Subject: [PATCH 071/981] powerpc: Clean up panic_timeout usage Default CONFIG_PANIC_TIMEOUT to 180 seconds on powerpc. The pSeries continue to set the timeout to 10 seconds at run-time. Thus, there's a small window where we don't have the correct value on pSeries, but if this is only run-time discoverable we don't have a better option. In any case, if the user changes the default setting of 180 seconds, we honor that user setting. Signed-off-by: Jason Baron Cc: benh@kernel.crashing.org Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: mpe@ellerman.id.au Cc: felipe.contreras@gmail.com Cc: linuxppc-dev@lists.ozlabs.org Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/705bbe0f70fb20759151642ba0176a6414ec9f7a.1385418410.git.jbaron@akamai.com Signed-off-by: Ingo Molnar --- arch/powerpc/Kconfig | 4 ++++ arch/powerpc/include/asm/setup.h | 1 + arch/powerpc/kernel/setup_32.c | 3 --- arch/powerpc/kernel/setup_64.c | 3 --- arch/powerpc/platforms/pseries/setup.c | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b44b52c0a8f0..b2be8e8cb5c7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -147,6 +147,10 @@ config EARLY_PRINTK bool default y +config PANIC_TIMEOUT + int + default 180 + config COMPAT bool default y if PPC64 diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 703a8412dac2..11ba86e17631 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -26,6 +26,7 @@ extern void reloc_got2(unsigned long); void check_for_initrd(void); void do_init_bootmem(void); void setup_panic(void); +#define ARCH_PANIC_TIMEOUT 180 #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index b903dc5cf944..2b0da27eaee4 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -296,9 +296,6 @@ void __init setup_arch(char **cmdline_p) if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) ucache_bsize = icache_bsize = dcache_bsize; - /* reboot on panic */ - panic_timeout = 180; - if (ppc_md.panic) setup_panic(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4085aaa9478f..856dd4e99bfe 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -588,9 +588,6 @@ void __init setup_arch(char **cmdline_p) dcache_bsize = ppc64_caches.dline_size; icache_bsize = ppc64_caches.iline_size; - /* reboot on panic */ - panic_timeout = 180; - if (ppc_md.panic) setup_panic(); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index c1f190858701..6f76ae417f47 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -470,7 +470,7 @@ static long pseries_little_endian_exceptions(void) static void __init pSeries_setup_arch(void) { - panic_timeout = 10; + set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); /* Discover PIC type and setup ppc_md accordingly */ pseries_discover_pic(); From 39e24d8ffbb6aa90222527dbd5991ec49fbbf323 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sat, 23 Nov 2013 18:38:01 +0900 Subject: [PATCH 072/981] sched: Fix a trivial syntax misuse Use if statement instead of while loop. Signed-off-by: Shigeru Yoshida Cc: Peter Zijlstra Cc: Jiri Kosina Link: http://lkml.kernel.org/r/20131123.183801.769652906919404319.shigeru.yoshida@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c1808606ee5f..687985b0284e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3654,7 +3654,7 @@ again: } double_rq_lock(rq, p_rq); - while (task_rq(p) != p_rq) { + if (task_rq(p) != p_rq) { double_rq_unlock(rq, p_rq); goto again; } From f48cfddc6729ef133933062320039808bafa6f45 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 Nov 2013 16:31:29 -0800 Subject: [PATCH 073/981] vfs: In d_path don't call d_dname on a mount point Aditya Kali (adityakali@google.com) wrote: > Commit bf056bfa80596a5d14b26b17276a56a0dcb080e5: > "proc: Fix the namespace inode permission checks." converted > the namespace files into symlinks. The same commit changed > the way namespace bind mounts appear in /proc/mounts: > $ mount --bind /proc/self/ns/ipc /mnt/ipc > Originally: > $ cat /proc/mounts | grep ipc > proc /mnt/ipc proc rw,nosuid,nodev,noexec 0 0 > > After commit bf056bfa80596a5d14b26b17276a56a0dcb080e5: > $ cat /proc/mounts | grep ipc > proc ipc:[4026531839] proc rw,nosuid,nodev,noexec 0 0 > > This breaks userspace which expects the 2nd field in > /proc/mounts to be a valid path. The symlink /proc//ns/{ipc,mnt,net,pid,user,uts} point to dentries allocated with d_alloc_pseudo that we can mount, and that have interesting names printed out with d_dname. When these files are bind mounted /proc/mounts is not currently displaying the mount point correctly because d_dname is called instead of just displaying the path where the file is mounted. Solve this by adding an explicit check to distinguish mounted pseudo inodes and unmounted pseudo inodes. Unmounted pseudo inodes always use mount of their filesstem as the mnt_root in their path making these two cases easy to distinguish. CC: stable@vger.kernel.org Acked-by: Serge Hallyn Reported-by: Aditya Kali Signed-off-by: "Eric W. Biederman" --- fs/dcache.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index 4bdb300b16e2..f7282ebf1a37 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3061,8 +3061,13 @@ char *d_path(const struct path *path, char *buf, int buflen) * thus don't need to be hashed. They also don't need a name until a * user wants to identify the object in /proc/pid/fd/. The little hack * below allows us to generate a name for these objects on demand: + * + * Some pseudo inodes are mountable. When they are mounted + * path->dentry == path->mnt->mnt_root. In that case don't call d_dname + * and instead have d_path return the mounted path. */ - if (path->dentry->d_op && path->dentry->d_op->d_dname) + if (path->dentry->d_op && path->dentry->d_op->d_dname && + (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); rcu_read_lock(); From 1f7f4dde5c945f41a7abc2285be43d918029ecc5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 14 Nov 2013 21:10:16 -0800 Subject: [PATCH 074/981] fork: Allow CLONE_PARENT after setns(CLONE_NEWPID) Serge Hallyn writes: > Hi Oleg, > > commit 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e : > "fork: unify and tighten up CLONE_NEWUSER/CLONE_NEWPID checks" > breaks lxc-attach in 3.12. That code forks a child which does > setns() and then does a clone(CLONE_PARENT). That way the > grandchild can be in the right namespaces (which the child was > not) and be a child of the original task, which is the monitor. > > lxc-attach in 3.11 was working fine with no side effects that I > could see. Is there a real danger in allowing CLONE_PARENT > when current->nsproxy->pidns_for_children is not our pidns, > or was this done out of an "over-abundance of caution"? Can we > safely revert that new extra check? The two fundamental things I know we can not allow are: - A shared signal queue aka CLONE_THREAD. Because we compute the pid and uid of the signal when we place it in the queue. - Changing the pid and by extention pid_namespace of an existing process. From a parents perspective there is nothing special about the pid namespace, to deny CLONE_PARENT, because the parent simply won't know or care. From the childs perspective all that is special really are shared signal queues. User mode threading with CLONE_PARENT|CLONE_VM|CLONE_SIGHAND and tasks in different pid namespaces is almost certainly going to break because it is complicated. But shared signal handlers can look at per thread information to know which pid namespace a process is in, so I don't know of any reason not to support CLONE_PARENT|CLONE_VM|CLONE_SIGHAND threads at the kernel level. It would be absolutely stupid to implement but that is a different thing. So hmm. Because it can do no harm, and because it is a regression let's remove the CLONE_PARENT check and send it stable. Cc: stable@vger.kernel.org Acked-by: Oleg Nesterov Acked-by: Andy Lutomirski Acked-by: Serge E. Hallyn Signed-off-by: "Eric W. Biederman" --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 728d5be9548c..f82fa2ee7581 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1171,7 +1171,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, * do not allow it to share a thread group or signal handlers or * parent with the forking task. */ - if (clone_flags & (CLONE_SIGHAND | CLONE_PARENT)) { + if (clone_flags & CLONE_SIGHAND) { if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || (task_active_pid_ns(current) != current->nsproxy->pid_ns_for_children)) From 41301ae78a99ead04ea42672a1ab72c6f44cc81d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 14 Nov 2013 21:22:25 -0800 Subject: [PATCH 075/981] vfs: Fix a regression in mounting proc Gao feng reported that commit e51db73532955dc5eaba4235e62b74b460709d5b userns: Better restrictions on when proc and sysfs can be mounted caused a regression on mounting a new instance of proc in a mount namespace created with user namespace privileges, when binfmt_misc is mounted on /proc/sys/fs/binfmt_misc. This is an unintended regression caused by the absolutely bogus empty directory check in fs_fully_visible. The check fs_fully_visible replaced didn't even bother to attempt to verify proc was fully visible and hiding proc files with any kind of mount is rare. So for now fix the userspace regression by allowing directory with nlink == 1 as /proc/sys/fs/binfmt_misc has. I will have a better patch but it is not stable material, or last minute kernel material. So it will have to wait. Cc: stable@vger.kernel.org Acked-by: Serge Hallyn Acked-by: Gao feng Tested-by: Gao feng Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index ac2ce8a766e1..be32ebccdeb1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2886,7 +2886,7 @@ bool fs_fully_visible(struct file_system_type *type) struct inode *inode = child->mnt_mountpoint->d_inode; if (!S_ISDIR(inode->i_mode)) goto next; - if (inode->i_nlink != 2) + if (inode->i_nlink > 2) goto next; } visible = true; From 5c4853b60ca8ec3d989ce05a5e995d15c3ed52c0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 20 Nov 2013 01:07:34 +0100 Subject: [PATCH 076/981] lockdep: Simplify a bit hardirq <-> softirq transitions Instead of saving the hardirq state on a per CPU variable, which require an explicit call before the softirq handling and some complication, just save and restore the hardirq tracing state through functions return values and parameters. It simplifies a bit the black magic that works around the fact that softirqs can be called from hardirqs while hardirqs can nest on softirqs but those two cases have very different semantics and only the latter case assume both states. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Linus Torvalds Cc: Andrew Morton Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/1384906054-30676-1-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/softirq.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index f84aa48c0e66..9a4500e4c189 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -213,40 +213,35 @@ EXPORT_SYMBOL(local_bh_enable_ip); #ifdef CONFIG_TRACE_IRQFLAGS /* - * Convoluted means of passing __do_softirq() a message through the various - * architecture execute_on_stack() bits. - * * When we run softirqs from irq_exit() and thus on the hardirq stack we need * to keep the lockdep irq context tracking as tight as possible in order to * not miss-qualify lock contexts and miss possible deadlocks. */ -static DEFINE_PER_CPU(int, softirq_from_hardirq); -static inline void lockdep_softirq_from_hardirq(void) +static inline bool lockdep_softirq_start(void) { - this_cpu_write(softirq_from_hardirq, 1); -} + bool in_hardirq = false; -static inline void lockdep_softirq_start(void) -{ - if (this_cpu_read(softirq_from_hardirq)) + if (trace_hardirq_context(current)) { + in_hardirq = true; trace_hardirq_exit(); + } + lockdep_softirq_enter(); + + return in_hardirq; } -static inline void lockdep_softirq_end(void) +static inline void lockdep_softirq_end(bool in_hardirq) { lockdep_softirq_exit(); - if (this_cpu_read(softirq_from_hardirq)) { - this_cpu_write(softirq_from_hardirq, 0); - trace_hardirq_enter(); - } -} + if (in_hardirq) + trace_hardirq_enter(); +} #else -static inline void lockdep_softirq_from_hardirq(void) { } -static inline void lockdep_softirq_start(void) { } -static inline void lockdep_softirq_end(void) { } +static inline bool lockdep_softirq_start(void) { return false; } +static inline void lockdep_softirq_end(bool in_hardirq) { } #endif asmlinkage void __do_softirq(void) @@ -255,6 +250,7 @@ asmlinkage void __do_softirq(void) unsigned long old_flags = current->flags; int max_restart = MAX_SOFTIRQ_RESTART; struct softirq_action *h; + bool in_hardirq; __u32 pending; int cpu; @@ -269,7 +265,7 @@ asmlinkage void __do_softirq(void) account_irq_enter_time(current); __local_bh_disable(_RET_IP_, SOFTIRQ_OFFSET); - lockdep_softirq_start(); + in_hardirq = lockdep_softirq_start(); cpu = smp_processor_id(); restart: @@ -316,7 +312,7 @@ restart: wakeup_softirqd(); } - lockdep_softirq_end(); + lockdep_softirq_end(in_hardirq); account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET); WARN_ON_ONCE(in_interrupt()); @@ -365,7 +361,6 @@ void irq_enter(void) static inline void invoke_softirq(void) { if (!force_irqthreads) { - lockdep_softirq_from_hardirq(); #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /* * We can safely execute softirq on the current stack if From 71ad88efebbcde374bddf904b96f3a7fc82d45d4 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 12 Nov 2013 17:58:48 +0100 Subject: [PATCH 077/981] perf: Add active_entry list head to struct perf_event This patch adds a new field to the struct perf_event. It is intended to be used to chain events which are active (enabled). It helps in the hardware layer for PMUs which do not have actual counter restrictions, i.e., free running read-only counters. Active events are chained as opposed to being tracked via the counter they use. To save space we use a union with hlist_entry as both are mutually exclusive (suggested by Jiri Olsa). Signed-off-by: Stephane Eranian Reviewed-by: Andi Kleen Signed-off-by: Peter Zijlstra Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Cc: maria.n.dimakopoulou@gmail.com Link: http://lkml.kernel.org/r/1384275531-10892-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ++++- kernel/events/core.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e069d1288df..8f4a70f2eca8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -319,7 +319,10 @@ struct perf_event { */ struct list_head migrate_entry; - struct hlist_node hlist_entry; + union { + struct hlist_node hlist_entry; + struct list_head active_entry; + }; int nr_siblings; int group_flags; struct perf_event *group_leader; diff --git a/kernel/events/core.c b/kernel/events/core.c index 72348dc192c1..403b781daafb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6655,6 +6655,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, INIT_LIST_HEAD(&event->event_entry); INIT_LIST_HEAD(&event->sibling_list); INIT_LIST_HEAD(&event->rb_entry); + INIT_LIST_HEAD(&event->active_entry); init_waitqueue_head(&event->waitq); init_irq_work(&event->pending, perf_pending_event); From 410136f5dd96b6013fe6d1011b523b1c247e1ccb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 12 Nov 2013 17:58:49 +0100 Subject: [PATCH 078/981] tools/perf/stat: Add event unit and scale support This patch adds perf stat support for handling event units and scales as exported by the kernel. The kernel can export PMU events actual unit and scaling factor via sysfs: $ ls -1 /sys/devices/power/events/energy-* /sys/devices/power/events/energy-cores /sys/devices/power/events/energy-cores.scale /sys/devices/power/events/energy-cores.unit /sys/devices/power/events/energy-pkg /sys/devices/power/events/energy-pkg.scale /sys/devices/power/events/energy-pkg.unit $ cat /sys/devices/power/events/energy-cores.scale 2.3283064365386962890625e-10 $ cat cat /sys/devices/power/events/energy-cores.unit Joules This patch modifies the pmu event alias code to check for the presence of the .unit and .scale files to load the corresponding values. They are then used by perf stat transparently: # perf stat -a -e power/energy-pkg/,power/energy-cores/,cycles -I 1000 sleep 1000 # time counts unit events 1.000214717 3.07 Joules power/energy-pkg/ [100.00%] 1.000214717 0.53 Joules power/energy-cores/ 1.000214717 12965028 cycles [100.00%] 2.000749289 3.01 Joules power/energy-pkg/ 2.000749289 0.52 Joules power/energy-cores/ 2.000749289 15817043 cycles When the event does not have an explicit unit exported by the kernel, nothing is printed. In csv output mode, there will be an empty field. Special thanks to Jiri for providing the supporting code in the parser to trigger reading of the scale and unit files. Signed-off-by: Stephane Eranian Reviewed-by: Jiri Olsa Reviewed-by: Andi Kleen Signed-off-by: Peter Zijlstra Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Cc: maria.n.dimakopoulou@gmail.com Cc: acme@redhat.com Link: http://lkml.kernel.org/r/1384275531-10892-3-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 114 ++++++++++++++++++++------- tools/perf/util/evsel.c | 2 + tools/perf/util/evsel.h | 3 + tools/perf/util/parse-events.c | 28 ++++--- tools/perf/util/pmu.c | 138 ++++++++++++++++++++++++++++++++- tools/perf/util/pmu.h | 3 +- 6 files changed, 244 insertions(+), 44 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ee0d565f83e3..dab98b50c9fe 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -138,6 +138,7 @@ static const char *post_cmd = NULL; static bool sync_run = false; static unsigned int interval = 0; static unsigned int initial_delay = 0; +static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static struct timespec ref_time; static struct cpu_map *aggr_map; @@ -461,17 +462,17 @@ static void print_interval(void) if (num_print_interval == 0 && !csv_output) { switch (aggr_mode) { case AGGR_SOCKET: - fprintf(output, "# time socket cpus counts events\n"); + fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); break; case AGGR_CORE: - fprintf(output, "# time core cpus counts events\n"); + fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); break; case AGGR_NONE: - fprintf(output, "# time CPU counts events\n"); + fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); break; case AGGR_GLOBAL: default: - fprintf(output, "# time counts events\n"); + fprintf(output, "# time counts %*s events\n", unit_width, "unit"); } } @@ -516,6 +517,7 @@ static int __run_perf_stat(int argc, const char **argv) unsigned long long t0, t1; struct perf_evsel *counter; struct timespec ts; + size_t l; int status = 0; const bool forks = (argc > 0); @@ -565,6 +567,10 @@ static int __run_perf_stat(int argc, const char **argv) return -1; } counter->supported = true; + + l = strlen(counter->unit); + if (l > unit_width) + unit_width = l; } if (perf_evlist__apply_filters(evsel_list)) { @@ -704,14 +710,25 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) { double msecs = avg / 1e6; - const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s"; + const char *fmt_v, *fmt_n; char name[25]; + fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; + fmt_n = csv_output ? "%s" : "%-25s"; + aggr_printout(evsel, cpu, nr); scnprintf(name, sizeof(name), "%s%s", perf_evsel__name(evsel), csv_output ? "" : " (msec)"); - fprintf(output, fmt, msecs, csv_sep, name); + + fprintf(output, fmt_v, msecs, csv_sep); + + if (csv_output) + fprintf(output, "%s%s", evsel->unit, csv_sep); + else + fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); + + fprintf(output, fmt_n, name); if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); @@ -908,21 +925,31 @@ static void print_ll_cache_misses(int cpu, static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) { double total, ratio = 0.0, total2; + double sc = evsel->scale; const char *fmt; - if (csv_output) - fmt = "%.0f%s%s"; - else if (big_num) - fmt = "%'18.0f%s%-25s"; - else - fmt = "%18.0f%s%-25s"; + if (csv_output) { + fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; + } else { + if (big_num) + fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; + else + fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; + } aggr_printout(evsel, cpu, nr); if (aggr_mode == AGGR_GLOBAL) cpu = 0; - fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel)); + fprintf(output, fmt, avg, csv_sep); + + if (evsel->unit) + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + evsel->unit, csv_sep); + + fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); @@ -941,7 +968,10 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) if (total && avg) { ratio = total / avg; - fprintf(output, "\n # %5.2f stalled cycles per insn", ratio); + fprintf(output, "\n"); + if (aggr_mode == AGGR_NONE) + fprintf(output, " "); + fprintf(output, " # %5.2f stalled cycles per insn", ratio); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && @@ -1061,6 +1091,7 @@ static void print_aggr(char *prefix) { struct perf_evsel *counter; int cpu, cpu2, s, s2, id, nr; + double uval; u64 ena, run, val; if (!(aggr_map || aggr_get_id)) @@ -1087,11 +1118,17 @@ static void print_aggr(char *prefix) if (run == 0 || ena == 0) { aggr_printout(counter, id, nr); - fprintf(output, "%*s%s%*s", + fprintf(output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep, - csv_output ? 0 : -24, + csv_sep); + + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(output, "%*s", + csv_output ? 0 : -25, perf_evsel__name(counter)); if (counter->cgrp) @@ -1101,11 +1138,12 @@ static void print_aggr(char *prefix) fputc('\n', output); continue; } + uval = val * counter->scale; if (nsec_counter(counter)) - nsec_printout(id, nr, counter, val); + nsec_printout(id, nr, counter, uval); else - abs_printout(id, nr, counter, val); + abs_printout(id, nr, counter, uval); if (!csv_output) { print_noise(counter, 1.0); @@ -1128,16 +1166,21 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) struct perf_stat *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; + double uval; if (prefix) fprintf(output, "%s", prefix); if (scaled == -1) { - fprintf(output, "%*s%s%*s", + fprintf(output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep, - csv_output ? 0 : -24, + csv_sep); + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + fprintf(output, "%*s", + csv_output ? 0 : -25, perf_evsel__name(counter)); if (counter->cgrp) @@ -1147,10 +1190,12 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) return; } + uval = avg * counter->scale; + if (nsec_counter(counter)) - nsec_printout(-1, 0, counter, avg); + nsec_printout(-1, 0, counter, uval); else - abs_printout(-1, 0, counter, avg); + abs_printout(-1, 0, counter, uval); print_noise(counter, avg); @@ -1177,6 +1222,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) static void print_counter(struct perf_evsel *counter, char *prefix) { u64 ena, run, val; + double uval; int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { @@ -1188,14 +1234,20 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); if (run == 0 || ena == 0) { - fprintf(output, "CPU%*d%s%*s%s%*s", + fprintf(output, "CPU%*d%s%*s%s", csv_output ? 0 : -4, perf_evsel__cpus(counter)->map[cpu], csv_sep, csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep, - csv_output ? 0 : -24, - perf_evsel__name(counter)); + csv_sep); + + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(output, "%*s", + csv_output ? 0 : -25, + perf_evsel__name(counter)); if (counter->cgrp) fprintf(output, "%s%s", @@ -1205,10 +1257,12 @@ static void print_counter(struct perf_evsel *counter, char *prefix) continue; } + uval = val * counter->scale; + if (nsec_counter(counter)) - nsec_printout(cpu, 0, counter, val); + nsec_printout(cpu, 0, counter, uval); else - abs_printout(cpu, 0, counter, val); + abs_printout(cpu, 0, counter, uval); if (!csv_output) { print_noise(counter, 1.0); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 46dd4c2a41ce..dad64926170f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -162,6 +162,8 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->idx = idx; evsel->attr = *attr; evsel->leader = evsel; + evsel->unit = ""; + evsel->scale = 1.0; INIT_LIST_HEAD(&evsel->node); hists__init(&evsel->hists); evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1ea7c92e6e33..8120eeb86ac1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -68,6 +68,8 @@ struct perf_evsel { u32 ids; struct hists hists; char *name; + double scale; + const char *unit; struct event_format *tp_format; union { void *priv; @@ -138,6 +140,7 @@ extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX]; int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *perf_evsel__name(struct perf_evsel *evsel); + const char *perf_evsel__group_name(struct perf_evsel *evsel); int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6de6f89c2a61..969cb8f0d88d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -269,9 +269,10 @@ const char *event_type(int type) -static int __add_event(struct list_head *list, int *idx, - struct perf_event_attr *attr, - char *name, struct cpu_map *cpus) +static struct perf_evsel * +__add_event(struct list_head *list, int *idx, + struct perf_event_attr *attr, + char *name, struct cpu_map *cpus) { struct perf_evsel *evsel; @@ -279,19 +280,19 @@ static int __add_event(struct list_head *list, int *idx, evsel = perf_evsel__new_idx(attr, (*idx)++); if (!evsel) - return -ENOMEM; + return NULL; evsel->cpus = cpus; if (name) evsel->name = strdup(name); list_add_tail(&evsel->node, list); - return 0; + return evsel; } static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name) { - return __add_event(list, idx, attr, name, NULL); + return __add_event(list, idx, attr, name, NULL) ? 0 : -ENOMEM; } static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) @@ -633,6 +634,9 @@ int parse_events_add_pmu(struct list_head *list, int *idx, { struct perf_event_attr attr; struct perf_pmu *pmu; + struct perf_evsel *evsel; + char *unit; + double scale; pmu = perf_pmu__find(name); if (!pmu) @@ -640,7 +644,7 @@ int parse_events_add_pmu(struct list_head *list, int *idx, memset(&attr, 0, sizeof(attr)); - if (perf_pmu__check_alias(pmu, head_config)) + if (perf_pmu__check_alias(pmu, head_config, &unit, &scale)) return -EINVAL; /* @@ -652,8 +656,14 @@ int parse_events_add_pmu(struct list_head *list, int *idx, if (perf_pmu__config(pmu, &attr, head_config)) return -EINVAL; - return __add_event(list, idx, &attr, pmu_event_name(head_config), - pmu->cpus); + evsel = __add_event(list, idx, &attr, pmu_event_name(head_config), + pmu->cpus); + if (evsel) { + evsel->unit = unit; + evsel->scale = scale; + } + + return evsel ? 0 : -ENOMEM; } int parse_events__modifier_group(struct list_head *list, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c232d8dd410b..56fc10a5e288 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,19 +1,23 @@ #include #include -#include #include #include #include #include "fs.h" +#include #include "util.h" #include "pmu.h" #include "parse-events.h" #include "cpumap.h" +#define UNIT_MAX_LEN 31 /* max length for event unit name */ + struct perf_pmu_alias { char *name; struct list_head terms; struct list_head list; + char unit[UNIT_MAX_LEN+1]; + double scale; }; struct perf_pmu_format { @@ -94,7 +98,80 @@ static int pmu_format(const char *name, struct list_head *format) return 0; } -static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) +static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *name) +{ + struct stat st; + ssize_t sret; + char scale[128]; + int fd, ret = -1; + char path[PATH_MAX]; + char *lc; + + snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + if (fstat(fd, &st) < 0) + goto error; + + sret = read(fd, scale, sizeof(scale)-1); + if (sret < 0) + goto error; + + scale[sret] = '\0'; + /* + * save current locale + */ + lc = setlocale(LC_NUMERIC, NULL); + + /* + * force to C locale to ensure kernel + * scale string is converted correctly. + * kernel uses default C locale. + */ + setlocale(LC_NUMERIC, "C"); + + alias->scale = strtod(scale, NULL); + + /* restore locale */ + setlocale(LC_NUMERIC, lc); + + ret = 0; +error: + close(fd); + return ret; +} + +static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *name) +{ + char path[PATH_MAX]; + ssize_t sret; + int fd; + + snprintf(path, PATH_MAX, "%s/%s.unit", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + sret = read(fd, alias->unit, UNIT_MAX_LEN); + if (sret < 0) + goto error; + + close(fd); + + alias->unit[sret] = '\0'; + + return 0; +error: + close(fd); + alias->unit[0] = '\0'; + return -1; +} + +static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) { struct perf_pmu_alias *alias; char buf[256]; @@ -110,6 +187,9 @@ static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) return -ENOMEM; INIT_LIST_HEAD(&alias->terms); + alias->scale = 1.0; + alias->unit[0] = '\0'; + ret = parse_events_terms(&alias->terms, buf); if (ret) { free(alias); @@ -117,7 +197,14 @@ static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) } alias->name = strdup(name); + /* + * load unit name and scale if available + */ + perf_pmu__parse_unit(alias, dir, name); + perf_pmu__parse_scale(alias, dir, name); + list_add_tail(&alias->list, list); + return 0; } @@ -129,6 +216,7 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) { struct dirent *evt_ent; DIR *event_dir; + size_t len; int ret = 0; event_dir = opendir(dir); @@ -143,13 +231,24 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) if (!strcmp(name, ".") || !strcmp(name, "..")) continue; + /* + * skip .unit and .scale info files + * parsed in perf_pmu__new_alias() + */ + len = strlen(name); + if (len > 5 && !strcmp(name + len - 5, ".unit")) + continue; + if (len > 6 && !strcmp(name + len - 6, ".scale")) + continue; + snprintf(path, PATH_MAX, "%s/%s", dir, name); ret = -EINVAL; file = fopen(path, "r"); if (!file) break; - ret = perf_pmu__new_alias(head, name, file); + + ret = perf_pmu__new_alias(head, dir, name, file); fclose(file); } @@ -508,16 +607,42 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, return NULL; } + +static int check_unit_scale(struct perf_pmu_alias *alias, + char **unit, double *scale) +{ + /* + * Only one term in event definition can + * define unit and scale, fail if there's + * more than one. + */ + if ((*unit && alias->unit) || + (*scale && alias->scale)) + return -EINVAL; + + if (alias->unit) + *unit = alias->unit; + + if (alias->scale) + *scale = alias->scale; + + return 0; +} + /* * Find alias in the terms list and replace it with the terms * defined for the alias */ -int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, + char **unit, double *scale) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; int ret; + *unit = NULL; + *scale = 0; + list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); if (!alias) @@ -525,6 +650,11 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) ret = pmu_alias_terms(alias, &term->list); if (ret) return ret; + + ret = check_unit_scale(alias, unit, scale); + if (ret) + return ret; + list_del(&term->list); free(term); } diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 1179b26f244a..9183380e2038 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -28,7 +28,8 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, int perf_pmu__config_terms(struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms); -int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms); +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, + char **unit, double *scale); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, struct list_head *head_terms); int perf_pmu_wrap(void); From 4788e5b4b2338f85fa42a712a182d8afd65d7c58 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 12 Nov 2013 17:58:50 +0100 Subject: [PATCH 079/981] perf/x86: Add Intel RAPL PMU support This patch adds a new uncore PMU to expose the Intel RAPL energy consumption counters. Up to 3 counters, each counting a particular RAPL event are exposed. The RAPL counters are available on Intel SandyBridge, IvyBridge, Haswell. The server skus add a 3rd counter. The following events are available and exposed in sysfs: - power/energy-cores: power consumption of all cores on socket - power/energy-pkg: power consumption of all cores + LLc cache - power/energy-dram: power consumption of DRAM (servers only) For each event both the unit (Joules) and scale (2^-32 J) is exposed in sysfs for use by perf stat and other tools. The files are: /sys/devices/power/events/energy-*.unit /sys/devices/power/events/energy-*.scale The RAPL PMU is uncore by nature and is implemented such that it only works in system-wide mode. Measuring only one CPU per socket is sufficient. The /sys/devices/power/cpumask file can be used by tools to figure out which CPUs to monitor by default. For instance, on a 2-socket system, 2 CPUs (one on each socket) will be shown. All the counters measure in the same unit (exposed via sysfs). The perf_events API exposes all RAPL counters as 64-bit integers counting in unit of 1/2^32 Joules (about 0.23 nJ). User level tools must convert the counts by multiplying them by 2^-32 to obtain Joules. The reason for this is that the kernel avoids doing floating point math whenever possible because it is expensive (user floating-point state must be saved). The method used avoids kernel floating-point usage. There is no loss of precision. Thanks to PeterZ for suggesting this approach. To convert the raw count in Watt: W = C * 2.3 / (1e10 * time) or ldexp(C, -32). RAPL PMU is a new standalone PMU which registers with the perf_event core subsystem. The PMU type (attr->type) is dynamically allocated and is available from /sys/device/power/type. Sampling is not supported by the RAPL PMU. There is no privilege level filtering either. Signed-off-by: Stephane Eranian Reviewed-by: Maria Dimakopoulou Reviewed-by: Andi Kleen Signed-off-by: Peter Zijlstra Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Link: http://lkml.kernel.org/r/1384275531-10892-4-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/perf_event_intel_rapl.c | 591 ++++++++++++++++++++ 2 files changed, 592 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/perf_event_intel_rapl.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 47b56a7e99cb..6359506a19ee 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_rapl.o endif diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c new file mode 100644 index 000000000000..cfcd386b5d89 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -0,0 +1,591 @@ +/* + * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters + * Copyright (C) 2013 Google, Inc., Stephane Eranian + * + * Intel RAPL interface is specified in the IA-32 Manual Vol3b + * section 14.7.1 (September 2013) + * + * RAPL provides more controls than just reporting energy consumption + * however here we only expose the 3 energy consumption free running + * counters (pp0, pkg, dram). + * + * Each of those counters increments in a power unit defined by the + * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules + * but it can vary. + * + * Counter to rapl events mappings: + * + * pp0 counter: consumption of all physical cores (power plane 0) + * event: rapl_energy_cores + * perf code: 0x1 + * + * pkg counter: consumption of the whole processor package + * event: rapl_energy_pkg + * perf code: 0x2 + * + * dram counter: consumption of the dram domain (servers only) + * event: rapl_energy_dram + * perf code: 0x3 + * + * We manage those counters as free running (read-only). They may be + * use simultaneously by other tools, such as turbostat. + * + * The events only support system-wide mode counting. There is no + * sampling support because it does not make sense and is not + * supported by the RAPL hardware. + * + * Because we want to avoid floating-point operations in the kernel, + * the events are all reported in fixed point arithmetic (32.32). + * Tools must adjust the counts to convert them to Watts using + * the duration of the measurement. Tools may use a function such as + * ldexp(raw_count, -32); + */ +#include +#include +#include +#include +#include "perf_event.h" + +/* + * RAPL energy status counters + */ +#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */ +#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */ +#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */ +#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ +#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ +#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ + +/* Clients have PP0, PKG */ +#define RAPL_IDX_CLN (1<config + * any other bit is reserved + */ +#define RAPL_EVENT_MASK 0xFFULL + +#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct kobj_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __rapl_##_var##_show, NULL) + +#define RAPL_EVENT_DESC(_name, _config) \ +{ \ + .attr = __ATTR(_name, 0444, rapl_event_show, NULL), \ + .config = _config, \ +} + +#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ + +struct rapl_pmu { + spinlock_t lock; + int hw_unit; /* 1/2^hw_unit Joule */ + int n_active; /* number of active events */ + struct list_head active_list; + struct pmu *pmu; /* pointer to rapl_pmu_class */ +}; + +static struct pmu rapl_pmu_class; +static cpumask_t rapl_cpu_mask; +static int rapl_cntr_mask; + +static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); +static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); + +static inline u64 rapl_read_counter(struct perf_event *event) +{ + u64 raw; + rdmsrl(event->hw.event_base, raw); + return raw; +} + +static inline u64 rapl_scale(u64 v) +{ + /* + * scale delta to smallest unit (1/2^32) + * users must then scale back: count * 1/(1e9*2^32) to get Joules + * or use ldexp(count, -32). + * Watts = Joules/Time delta + */ + return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit); +} + +static u64 rapl_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev_raw_count, new_raw_count; + s64 delta, sdelta; + int shift = RAPL_CNTR_WIDTH; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + rdmsrl(event->hw.event_base, new_raw_count); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) { + cpu_relax(); + goto again; + } + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + sdelta = rapl_scale(delta); + + local64_add(sdelta, &event->count); + + return new_raw_count; +} + +static void __rapl_pmu_event_start(struct rapl_pmu *pmu, + struct perf_event *event) +{ + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + event->hw.state = 0; + + list_add_tail(&event->active_entry, &pmu->active_list); + + local64_set(&event->hw.prev_count, rapl_read_counter(event)); + + pmu->n_active++; +} + +static void rapl_pmu_event_start(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + unsigned long flags; + + spin_lock_irqsave(&pmu->lock, flags); + __rapl_pmu_event_start(pmu, event); + spin_unlock_irqrestore(&pmu->lock, flags); +} + +static void rapl_pmu_event_stop(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long flags; + + spin_lock_irqsave(&pmu->lock, flags); + + /* mark event as deactivated and stopped */ + if (!(hwc->state & PERF_HES_STOPPED)) { + WARN_ON_ONCE(pmu->n_active <= 0); + pmu->n_active--; + + list_del(&event->active_entry); + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } + + /* check if update of sw counter is necessary */ + if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + rapl_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } + + spin_unlock_irqrestore(&pmu->lock, flags); +} + +static int rapl_pmu_event_add(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long flags; + + spin_lock_irqsave(&pmu->lock, flags); + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (mode & PERF_EF_START) + __rapl_pmu_event_start(pmu, event); + + spin_unlock_irqrestore(&pmu->lock, flags); + + return 0; +} + +static void rapl_pmu_event_del(struct perf_event *event, int flags) +{ + rapl_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int rapl_pmu_event_init(struct perf_event *event) +{ + u64 cfg = event->attr.config & RAPL_EVENT_MASK; + int bit, msr, ret = 0; + + /* only look at RAPL events */ + if (event->attr.type != rapl_pmu_class.type) + return -ENOENT; + + /* check only supported bits are set */ + if (event->attr.config & ~RAPL_EVENT_MASK) + return -EINVAL; + + /* + * check event is known (determines counter) + */ + switch (cfg) { + case INTEL_RAPL_PP0: + bit = RAPL_IDX_PP0_NRG_STAT; + msr = MSR_PP0_ENERGY_STATUS; + break; + case INTEL_RAPL_PKG: + bit = RAPL_IDX_PKG_NRG_STAT; + msr = MSR_PKG_ENERGY_STATUS; + break; + case INTEL_RAPL_RAM: + bit = RAPL_IDX_RAM_NRG_STAT; + msr = MSR_DRAM_ENERGY_STATUS; + break; + default: + return -EINVAL; + } + /* check event supported */ + if (!(rapl_cntr_mask & (1 << bit))) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + event->attr.sample_period) /* no sampling */ + return -EINVAL; + + /* must be done before validate_group */ + event->hw.event_base = msr; + event->hw.config = cfg; + event->hw.idx = bit; + + return ret; +} + +static void rapl_pmu_event_read(struct perf_event *event) +{ + rapl_event_update(event); +} + +static ssize_t rapl_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &rapl_cpu_mask); + + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); + +static struct attribute *rapl_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group rapl_pmu_attr_group = { + .attrs = rapl_pmu_attrs, +}; + +EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); +EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); +EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); + +EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); +EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); +EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); + +/* + * we compute in 0.23 nJ increments regardless of MSR + */ +EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10"); +EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); +EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); + +static struct attribute *rapl_events_srv_attr[] = { + EVENT_PTR(rapl_cores), + EVENT_PTR(rapl_pkg), + EVENT_PTR(rapl_ram), + + EVENT_PTR(rapl_cores_unit), + EVENT_PTR(rapl_pkg_unit), + EVENT_PTR(rapl_ram_unit), + + EVENT_PTR(rapl_cores_scale), + EVENT_PTR(rapl_pkg_scale), + EVENT_PTR(rapl_ram_scale), + NULL, +}; + +static struct attribute *rapl_events_cln_attr[] = { + EVENT_PTR(rapl_cores), + EVENT_PTR(rapl_pkg), + + EVENT_PTR(rapl_cores_unit), + EVENT_PTR(rapl_pkg_unit), + + EVENT_PTR(rapl_cores_scale), + EVENT_PTR(rapl_pkg_scale), + NULL, +}; + +static struct attribute_group rapl_pmu_events_group = { + .name = "events", + .attrs = NULL, /* patched at runtime */ +}; + +DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +static struct attribute *rapl_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group rapl_pmu_format_group = { + .name = "format", + .attrs = rapl_formats_attr, +}; + +const struct attribute_group *rapl_attr_groups[] = { + &rapl_pmu_attr_group, + &rapl_pmu_format_group, + &rapl_pmu_events_group, + NULL, +}; + +static struct pmu rapl_pmu_class = { + .attr_groups = rapl_attr_groups, + .task_ctx_nr = perf_invalid_context, /* system-wide only */ + .event_init = rapl_pmu_event_init, + .add = rapl_pmu_event_add, /* must have */ + .del = rapl_pmu_event_del, /* must have */ + .start = rapl_pmu_event_start, + .stop = rapl_pmu_event_stop, + .read = rapl_pmu_event_read, +}; + +static void rapl_cpu_exit(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); + int i, phys_id = topology_physical_package_id(cpu); + int target = -1; + + /* find a new cpu on same package */ + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (phys_id == topology_physical_package_id(i)) { + target = i; + break; + } + } + /* + * clear cpu from cpumask + * if was set in cpumask and still some cpu on package, + * then move to new cpu + */ + if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0) + cpumask_set_cpu(target, &rapl_cpu_mask); + + WARN_ON(cpumask_empty(&rapl_cpu_mask)); + /* + * migrate events and context to new cpu + */ + if (target >= 0) + perf_pmu_migrate_context(pmu->pmu, cpu, target); +} + +static void rapl_cpu_init(int cpu) +{ + int i, phys_id = topology_physical_package_id(cpu); + + /* check if phys_is is already covered */ + for_each_cpu(i, &rapl_cpu_mask) { + if (phys_id == topology_physical_package_id(i)) + return; + } + /* was not found, so add it */ + cpumask_set_cpu(cpu, &rapl_cpu_mask); +} + +static int rapl_cpu_prepare(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); + int phys_id = topology_physical_package_id(cpu); + + if (pmu) + return 0; + + if (phys_id < 0) + return -1; + + pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); + if (!pmu) + return -1; + + spin_lock_init(&pmu->lock); + + INIT_LIST_HEAD(&pmu->active_list); + + /* + * grab power unit as: 1/2^unit Joules + * + * we cache in local PMU instance + */ + rdmsrl(MSR_RAPL_POWER_UNIT, pmu->hw_unit); + pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL; + pmu->pmu = &rapl_pmu_class; + + /* set RAPL pmu for this cpu for now */ + per_cpu(rapl_pmu, cpu) = pmu; + per_cpu(rapl_pmu_to_free, cpu) = NULL; + + return 0; +} + +static void rapl_cpu_kfree(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu); + + kfree(pmu); + + per_cpu(rapl_pmu_to_free, cpu) = NULL; +} + +static int rapl_cpu_dying(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); + + if (!pmu) + return 0; + + per_cpu(rapl_pmu, cpu) = NULL; + + per_cpu(rapl_pmu_to_free, cpu) = pmu; + + return 0; +} + +static int rapl_cpu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + rapl_cpu_prepare(cpu); + break; + case CPU_STARTING: + rapl_cpu_init(cpu); + break; + case CPU_UP_CANCELED: + case CPU_DYING: + rapl_cpu_dying(cpu); + break; + case CPU_ONLINE: + case CPU_DEAD: + rapl_cpu_kfree(cpu); + break; + case CPU_DOWN_PREPARE: + rapl_cpu_exit(cpu); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static const struct x86_cpu_id rapl_cpu_match[] = { + [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, + [1] = {}, +}; + +static int __init rapl_pmu_init(void) +{ + struct rapl_pmu *pmu; + int cpu, ret; + + /* + * check for Intel processor family 6 + */ + if (!x86_match_cpu(rapl_cpu_match)) + return 0; + + /* check supported CPU */ + switch (boot_cpu_data.x86_model) { + case 42: /* Sandy Bridge */ + case 58: /* Ivy Bridge */ + case 60: /* Haswell */ + rapl_cntr_mask = RAPL_IDX_CLN; + rapl_pmu_events_group.attrs = rapl_events_cln_attr; + break; + case 45: /* Sandy Bridge-EP */ + case 62: /* IvyTown */ + rapl_cntr_mask = RAPL_IDX_SRV; + rapl_pmu_events_group.attrs = rapl_events_srv_attr; + break; + + default: + /* unsupported */ + return 0; + } + get_online_cpus(); + + for_each_online_cpu(cpu) { + rapl_cpu_prepare(cpu); + rapl_cpu_init(cpu); + } + + perf_cpu_notifier(rapl_cpu_notifier); + + ret = perf_pmu_register(&rapl_pmu_class, "power", -1); + if (WARN_ON(ret)) { + pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret); + put_online_cpus(); + return -1; + } + + pmu = __get_cpu_var(rapl_pmu); + + pr_info("RAPL PMU detected, hw unit 2^-%d Joules," + " API unit is 2^-32 Joules," + " %d fixed counters\n", + pmu->hw_unit, + hweight32(rapl_cntr_mask)); + + put_online_cpus(); + + return 0; +} +device_initcall(rapl_pmu_init); From 8dce7a9a6f4ca7163161a80a4603b66c88c5de8e Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Jun 2013 18:41:16 -0400 Subject: [PATCH 080/981] lockdep: Be nice about building from userspace Lockdep is an awesome piece of code which detects locking issues which are relevant both to userspace and kernelspace. We can easily make lockdep work in userspace since there is really no kernel spacific magic going on in the code. All we need is to wrap two functions which are used by lockdep and are very kernel specific. Doing that will allow tools located in tools/ to easily utilize lockdep's code for their own use. Signed-off-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: penberg@kernel.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/1352753446-24109-1-git-send-email-sasha.levin@oracle.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 576ba756a32d..eb8a54783fa0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -590,6 +590,7 @@ static int very_verbose(struct lock_class *class) /* * Is this the address of a static object: */ +#ifdef __KERNEL__ static int static_obj(void *obj) { unsigned long start = (unsigned long) &_stext, @@ -616,6 +617,7 @@ static int static_obj(void *obj) */ return is_module_address(addr) || is_module_percpu_address(addr); } +#endif /* * To make lock name printouts unique, we calculate a unique @@ -4115,6 +4117,7 @@ void debug_check_no_locks_held(void) } EXPORT_SYMBOL_GPL(debug_check_no_locks_held); +#ifdef __KERNEL__ void debug_show_all_locks(void) { struct task_struct *g, *p; @@ -4172,6 +4175,7 @@ retry: read_unlock(&tasklist_lock); } EXPORT_SYMBOL_GPL(debug_show_all_locks); +#endif /* * Careful: only use this function if you are sure that From 5634bd7d2ab14fbf736b62b0788fb68e2cb0fde2 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Jun 2013 18:41:17 -0400 Subject: [PATCH 081/981] liblockdep: Wrap kernel/locking/lockdep.c to allow usage from userspace kernel/locking/lockdep.c deals with validating locking scenarios for various architectures supported by the kernel. There isn't anything kernel specific going on in lockdep, and when we compare userspace to other architectures that don't have to deal with irqs such as s390, they become all too similar. We wrap kernel/locking/lockdep.c and include/linux/lockdep.h with several headers which allow us to build and use lockdep from userspace. We don't touch the kernel code itself which means that any work done on lockdep in the kernel will automatically benefit userspace lockdep as well! Signed-off-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/1371163284-6346-3-git-send-email-sasha.levin@oracle.com Signed-off-by: Ingo Molnar --- tools/lib/lockdep/Makefile | 251 ++++++++++++++++++ tools/lib/lockdep/common.c | 33 +++ tools/lib/lockdep/lockdep.c | 2 + tools/lib/lockdep/lockdep_internals.h | 1 + tools/lib/lockdep/lockdep_states.h | 1 + tools/lib/lockdep/rbtree.c | 1 + tools/lib/lockdep/uinclude/asm/hweight.h | 3 + tools/lib/lockdep/uinclude/asm/sections.h | 3 + tools/lib/lockdep/uinclude/linux/bitops.h | 3 + tools/lib/lockdep/uinclude/linux/compiler.h | 7 + .../lib/lockdep/uinclude/linux/debug_locks.h | 12 + tools/lib/lockdep/uinclude/linux/delay.h | 3 + tools/lib/lockdep/uinclude/linux/export.h | 7 + tools/lib/lockdep/uinclude/linux/ftrace.h | 3 + tools/lib/lockdep/uinclude/linux/gfp.h | 3 + tools/lib/lockdep/uinclude/linux/hardirq.h | 11 + tools/lib/lockdep/uinclude/linux/hash.h | 1 + tools/lib/lockdep/uinclude/linux/interrupt.h | 3 + tools/lib/lockdep/uinclude/linux/irqflags.h | 38 +++ tools/lib/lockdep/uinclude/linux/kallsyms.h | 32 +++ .../lib/lockdep/uinclude/linux/kern_levels.h | 25 ++ tools/lib/lockdep/uinclude/linux/kernel.h | 44 +++ tools/lib/lockdep/uinclude/linux/kmemcheck.h | 8 + tools/lib/lockdep/uinclude/linux/linkage.h | 3 + tools/lib/lockdep/uinclude/linux/list.h | 1 + tools/lib/lockdep/uinclude/linux/lockdep.h | 55 ++++ tools/lib/lockdep/uinclude/linux/module.h | 6 + tools/lib/lockdep/uinclude/linux/mutex.h | 3 + tools/lib/lockdep/uinclude/linux/poison.h | 1 + tools/lib/lockdep/uinclude/linux/prefetch.h | 6 + tools/lib/lockdep/uinclude/linux/proc_fs.h | 3 + tools/lib/lockdep/uinclude/linux/rbtree.h | 1 + .../lockdep/uinclude/linux/rbtree_augmented.h | 2 + tools/lib/lockdep/uinclude/linux/rcu.h | 16 ++ tools/lib/lockdep/uinclude/linux/seq_file.h | 3 + tools/lib/lockdep/uinclude/linux/spinlock.h | 25 ++ tools/lib/lockdep/uinclude/linux/stacktrace.h | 32 +++ tools/lib/lockdep/uinclude/linux/stringify.h | 7 + tools/lib/lockdep/uinclude/linux/types.h | 58 ++++ .../lib/lockdep/uinclude/trace/events/lock.h | 3 + 40 files changed, 720 insertions(+) create mode 100644 tools/lib/lockdep/Makefile create mode 100644 tools/lib/lockdep/common.c create mode 100644 tools/lib/lockdep/lockdep.c create mode 100644 tools/lib/lockdep/lockdep_internals.h create mode 100644 tools/lib/lockdep/lockdep_states.h create mode 100644 tools/lib/lockdep/rbtree.c create mode 100644 tools/lib/lockdep/uinclude/asm/hweight.h create mode 100644 tools/lib/lockdep/uinclude/asm/sections.h create mode 100644 tools/lib/lockdep/uinclude/linux/bitops.h create mode 100644 tools/lib/lockdep/uinclude/linux/compiler.h create mode 100644 tools/lib/lockdep/uinclude/linux/debug_locks.h create mode 100644 tools/lib/lockdep/uinclude/linux/delay.h create mode 100644 tools/lib/lockdep/uinclude/linux/export.h create mode 100644 tools/lib/lockdep/uinclude/linux/ftrace.h create mode 100644 tools/lib/lockdep/uinclude/linux/gfp.h create mode 100644 tools/lib/lockdep/uinclude/linux/hardirq.h create mode 100644 tools/lib/lockdep/uinclude/linux/hash.h create mode 100644 tools/lib/lockdep/uinclude/linux/interrupt.h create mode 100644 tools/lib/lockdep/uinclude/linux/irqflags.h create mode 100644 tools/lib/lockdep/uinclude/linux/kallsyms.h create mode 100644 tools/lib/lockdep/uinclude/linux/kern_levels.h create mode 100644 tools/lib/lockdep/uinclude/linux/kernel.h create mode 100644 tools/lib/lockdep/uinclude/linux/kmemcheck.h create mode 100644 tools/lib/lockdep/uinclude/linux/linkage.h create mode 100644 tools/lib/lockdep/uinclude/linux/list.h create mode 100644 tools/lib/lockdep/uinclude/linux/lockdep.h create mode 100644 tools/lib/lockdep/uinclude/linux/module.h create mode 100644 tools/lib/lockdep/uinclude/linux/mutex.h create mode 100644 tools/lib/lockdep/uinclude/linux/poison.h create mode 100644 tools/lib/lockdep/uinclude/linux/prefetch.h create mode 100644 tools/lib/lockdep/uinclude/linux/proc_fs.h create mode 100644 tools/lib/lockdep/uinclude/linux/rbtree.h create mode 100644 tools/lib/lockdep/uinclude/linux/rbtree_augmented.h create mode 100644 tools/lib/lockdep/uinclude/linux/rcu.h create mode 100644 tools/lib/lockdep/uinclude/linux/seq_file.h create mode 100644 tools/lib/lockdep/uinclude/linux/spinlock.h create mode 100644 tools/lib/lockdep/uinclude/linux/stacktrace.h create mode 100644 tools/lib/lockdep/uinclude/linux/stringify.h create mode 100644 tools/lib/lockdep/uinclude/linux/types.h create mode 100644 tools/lib/lockdep/uinclude/trace/events/lock.h diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile new file mode 100644 index 000000000000..da8b7aa3d351 --- /dev/null +++ b/tools/lib/lockdep/Makefile @@ -0,0 +1,251 @@ +# liblockdep version +LL_VERSION = 0 +LL_PATCHLEVEL = 0 +LL_EXTRAVERSION = 1 + +# file format version +FILE_VERSION = 1 + +MAKEFLAGS += --no-print-directory + + +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) + +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +prefix ?= /usr/local +libdir_relative = lib +libdir = $(prefix)/$(libdir_relative) +bindir_relative = bin +bindir = $(prefix)/$(bindir_relative) + +export DESTDIR DESTDIR_SQ INSTALL + +# copy a bit from Linux kbuild + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ("$(origin O)", "command line") + BUILD_OUTPUT := $(O) +endif + +ifeq ($(BUILD_SRC),) +ifneq ($(BUILD_OUTPUT),) + +define build_output + $(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \ + BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1 +endef + +saved-output := $(BUILD_OUTPUT) +BUILD_OUTPUT := $(shell cd $(BUILD_OUTPUT) && /bin/pwd) +$(if $(BUILD_OUTPUT),, \ + $(error output directory "$(saved-output)" does not exist)) + +all: sub-make + +gui: force + $(call build_output, all_cmd) + +$(filter-out gui,$(MAKECMDGOALS)): sub-make + +sub-make: force + $(call build_output, $(MAKECMDGOALS)) + + +# Leave processing to above invocation of make +skip-makefile := 1 + +endif # BUILD_OUTPUT +endif # BUILD_SRC + +# We process the rest of the Makefile if this is the final invocation of make +ifeq ($(skip-makefile),) + +srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) +objtree := $(CURDIR) +src := $(srctree) +obj := $(objtree) + +export prefix libdir bindir src obj + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +bindir_SQ = $(subst ','\'',$(bindir)) + +LIB_FILE = liblockdep.a liblockdep.so +BIN_FILE = lockdep + +CONFIG_INCLUDES = +CONFIG_LIBS = +CONFIG_FLAGS = + +OBJ = $@ +N = + +export Q VERBOSE + +LIBLOCKDEP_VERSION = $(LL_VERSION).$(LL_PATCHLEVEL).$(LL_EXTRAVERSION) + +INCLUDES = -I. -I/usr/local/include -I./uinclude $(CONFIG_INCLUDES) + +# Set compile option CFLAGS if not set elsewhere +CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g + +override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) + +ifeq ($(VERBOSE),1) + Q = + print_compile = + print_app_build = + print_fpic_compile = + print_shared_lib_compile = + print_install = +else + Q = @ + print_compile = echo ' CC '$(OBJ); + print_app_build = echo ' BUILD '$(OBJ); + print_fpic_compile = echo ' CC FPIC '$(OBJ); + print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); + print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); + print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; +endif + +do_fpic_compile = \ + ($(print_fpic_compile) \ + $(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@) + +do_app_build = \ + ($(print_app_build) \ + $(CC) $^ -rdynamic -o $@ $(CONFIG_LIBS) $(LIBS)) + +do_compile_shared_library = \ + ($(print_shared_lib_compile) \ + $(CC) --shared $^ -o $@ -lpthread -ldl) + +do_build_static_lib = \ + ($(print_static_lib_build) \ + $(RM) $@; $(AR) rcs $@ $^) + + +define do_compile + $(print_compile) \ + $(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; +endef + +$(obj)/%.o: $(src)/%.c + $(Q)$(call do_compile) + +%.o: $(src)/%.c + $(Q)$(call do_compile) + +PEVENT_LIB_OBJS = common.o lockdep.o preload.o rbtree.o + +ALL_OBJS = $(PEVENT_LIB_OBJS) + +CMD_TARGETS = $(LIB_FILE) + +TARGETS = $(CMD_TARGETS) + + +all: all_cmd + +all_cmd: $(CMD_TARGETS) + +liblockdep.so: $(PEVENT_LIB_OBJS) + $(Q)$(do_compile_shared_library) + +liblockdep.a: $(PEVENT_LIB_OBJS) + $(Q)$(do_build_static_lib) + +$(PEVENT_LIB_OBJS): %.o: $(src)/%.c + $(Q)$(do_fpic_compile) + +## make deps + +all_objs := $(sort $(ALL_OBJS)) +all_deps := $(all_objs:%.o=.%.d) + +# let .d file also depends on the source and header files +define check_deps + @set -e; $(RM) $@; \ + $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ + sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ + $(RM) $@.$$$$ +endef + +$(all_deps): .%.d: $(src)/%.c + $(Q)$(call check_deps) + +$(all_objs) : %.o : .%.d + +dep_includes := $(wildcard $(all_deps)) + +ifneq ($(dep_includes),) + include $(dep_includes) +endif + +### Detect environment changes +TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) + +tags: force + $(RM) tags + find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ + --regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/' + +TAGS: force + $(RM) TAGS + find . -name '*.[ch]' | xargs etags \ + --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' + +define do_install + $(print_install) \ + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 '$(DESTDIR_SQ)$2' +endef + +install_lib: all_cmd + $(Q)$(call do_install,$(LIB_FILE),$(libdir_SQ)) + $(Q)$(call do_install,$(BIN_FILE),$(bindir_SQ)) + +install: install_lib + +clean: + $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d + $(RM) tags TAGS + +endif # skip-makefile + +PHONY += force +force: + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable so we can use it in if_changed and friends. +.PHONY: $(PHONY) diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c new file mode 100644 index 000000000000..8ef602f18a32 --- /dev/null +++ b/tools/lib/lockdep/common.c @@ -0,0 +1,33 @@ +#include +#include +#include +#include +#include +#include + +static __thread struct task_struct current_obj; + +/* lockdep wants these */ +bool debug_locks = true; +bool debug_locks_silent; + +__attribute__((constructor)) static void liblockdep_init(void) +{ + lockdep_init(); +} + +__attribute__((destructor)) static void liblockdep_exit(void) +{ + debug_check_no_locks_held(¤t_obj); +} + +struct task_struct *__curr(void) +{ + if (current_obj.pid == 0) { + /* Makes lockdep output pretty */ + prctl(PR_GET_NAME, current_obj.comm); + current_obj.pid = syscall(__NR_gettid); + } + + return ¤t_obj; +} diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c new file mode 100644 index 000000000000..f42b7e9aa48f --- /dev/null +++ b/tools/lib/lockdep/lockdep.c @@ -0,0 +1,2 @@ +#include +#include "../../../kernel/locking/lockdep.c" diff --git a/tools/lib/lockdep/lockdep_internals.h b/tools/lib/lockdep/lockdep_internals.h new file mode 100644 index 000000000000..29d0c954cc24 --- /dev/null +++ b/tools/lib/lockdep/lockdep_internals.h @@ -0,0 +1 @@ +#include "../../../kernel/locking/lockdep_internals.h" diff --git a/tools/lib/lockdep/lockdep_states.h b/tools/lib/lockdep/lockdep_states.h new file mode 100644 index 000000000000..248d235efda9 --- /dev/null +++ b/tools/lib/lockdep/lockdep_states.h @@ -0,0 +1 @@ +#include "../../../kernel/locking/lockdep_states.h" diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c new file mode 100644 index 000000000000..f7f43033c8b7 --- /dev/null +++ b/tools/lib/lockdep/rbtree.c @@ -0,0 +1 @@ +#include "../../../lib/rbtree.c" diff --git a/tools/lib/lockdep/uinclude/asm/hweight.h b/tools/lib/lockdep/uinclude/asm/hweight.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/asm/hweight.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/asm/sections.h b/tools/lib/lockdep/uinclude/asm/sections.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/asm/sections.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/linux/bitops.h b/tools/lib/lockdep/uinclude/linux/bitops.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/bitops.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/linux/compiler.h b/tools/lib/lockdep/uinclude/linux/compiler.h new file mode 100644 index 000000000000..7ac838a1f196 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/compiler.h @@ -0,0 +1,7 @@ +#ifndef _LIBLOCKDEP_LINUX_COMPILER_H_ +#define _LIBLOCKDEP_LINUX_COMPILER_H_ + +#define __used __attribute__((__unused__)) +#define unlikely + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/debug_locks.h b/tools/lib/lockdep/uinclude/linux/debug_locks.h new file mode 100644 index 000000000000..f38eb64df794 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/debug_locks.h @@ -0,0 +1,12 @@ +#ifndef _LIBLOCKDEP_DEBUG_LOCKS_H_ +#define _LIBLOCKDEP_DEBUG_LOCKS_H_ + +#include +#include + +#define DEBUG_LOCKS_WARN_ON(x) (x) + +extern bool debug_locks; +extern bool debug_locks_silent; + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/delay.h b/tools/lib/lockdep/uinclude/linux/delay.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/delay.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/linux/export.h b/tools/lib/lockdep/uinclude/linux/export.h new file mode 100644 index 000000000000..6bdf3492c535 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/export.h @@ -0,0 +1,7 @@ +#ifndef _LIBLOCKDEP_LINUX_EXPORT_H_ +#define _LIBLOCKDEP_LINUX_EXPORT_H_ + +#define EXPORT_SYMBOL(sym) +#define EXPORT_SYMBOL_GPL(sym) + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/ftrace.h b/tools/lib/lockdep/uinclude/linux/ftrace.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/ftrace.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/linux/gfp.h b/tools/lib/lockdep/uinclude/linux/gfp.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/gfp.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/linux/hardirq.h b/tools/lib/lockdep/uinclude/linux/hardirq.h new file mode 100644 index 000000000000..c8f3f8f58729 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/hardirq.h @@ -0,0 +1,11 @@ +#ifndef _LIBLOCKDEP_LINUX_HARDIRQ_H_ +#define _LIBLOCKDEP_LINUX_HARDIRQ_H_ + +#define SOFTIRQ_BITS 0UL +#define HARDIRQ_BITS 0UL +#define SOFTIRQ_SHIFT 0UL +#define HARDIRQ_SHIFT 0UL +#define hardirq_count() 0UL +#define softirq_count() 0UL + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/hash.h b/tools/lib/lockdep/uinclude/linux/hash.h new file mode 100644 index 000000000000..0f8479858dc0 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/hash.h @@ -0,0 +1 @@ +#include "../../../include/linux/hash.h" diff --git a/tools/lib/lockdep/uinclude/linux/interrupt.h b/tools/lib/lockdep/uinclude/linux/interrupt.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/interrupt.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/linux/irqflags.h b/tools/lib/lockdep/uinclude/linux/irqflags.h new file mode 100644 index 000000000000..6cc296f0fad0 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/irqflags.h @@ -0,0 +1,38 @@ +#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ +#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ + +# define trace_hardirq_context(p) 0 +# define trace_softirq_context(p) 0 +# define trace_hardirqs_enabled(p) 0 +# define trace_softirqs_enabled(p) 0 +# define trace_hardirq_enter() do { } while (0) +# define trace_hardirq_exit() do { } while (0) +# define lockdep_softirq_enter() do { } while (0) +# define lockdep_softirq_exit() do { } while (0) +# define INIT_TRACE_IRQFLAGS + +# define stop_critical_timings() do { } while (0) +# define start_critical_timings() do { } while (0) + +#define raw_local_irq_disable() do { } while (0) +#define raw_local_irq_enable() do { } while (0) +#define raw_local_irq_save(flags) ((flags) = 0) +#define raw_local_irq_restore(flags) do { } while (0) +#define raw_local_save_flags(flags) ((flags) = 0) +#define raw_irqs_disabled_flags(flags) do { } while (0) +#define raw_irqs_disabled() 0 +#define raw_safe_halt() + +#define local_irq_enable() do { } while (0) +#define local_irq_disable() do { } while (0) +#define local_irq_save(flags) ((flags) = 0) +#define local_irq_restore(flags) do { } while (0) +#define local_save_flags(flags) ((flags) = 0) +#define irqs_disabled() (1) +#define irqs_disabled_flags(flags) (0) +#define safe_halt() do { } while (0) + +#define trace_lock_release(x, y) +#define trace_lock_acquire(a, b, c, d, e, f, g) + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/kallsyms.h b/tools/lib/lockdep/uinclude/linux/kallsyms.h new file mode 100644 index 000000000000..b0f2dbdf1a15 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/kallsyms.h @@ -0,0 +1,32 @@ +#ifndef _LIBLOCKDEP_LINUX_KALLSYMS_H_ +#define _LIBLOCKDEP_LINUX_KALLSYMS_H_ + +#include +#include + +#define KSYM_NAME_LEN 128 + +struct module; + +static inline const char *kallsyms_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname, char *namebuf) +{ + return NULL; +} + +#include +#include +static inline void print_ip_sym(unsigned long ip) +{ + char **name; + + name = backtrace_symbols((void **)&ip, 1); + + printf("%s\n", *name); + + free(name); +} + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/kern_levels.h b/tools/lib/lockdep/uinclude/linux/kern_levels.h new file mode 100644 index 000000000000..3b9bade28698 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/kern_levels.h @@ -0,0 +1,25 @@ +#ifndef __KERN_LEVELS_H__ +#define __KERN_LEVELS_H__ + +#define KERN_SOH "" /* ASCII Start Of Header */ +#define KERN_SOH_ASCII '' + +#define KERN_EMERG KERN_SOH "" /* system is unusable */ +#define KERN_ALERT KERN_SOH "" /* action must be taken immediately */ +#define KERN_CRIT KERN_SOH "" /* critical conditions */ +#define KERN_ERR KERN_SOH "" /* error conditions */ +#define KERN_WARNING KERN_SOH "" /* warning conditions */ +#define KERN_NOTICE KERN_SOH "" /* normal but significant condition */ +#define KERN_INFO KERN_SOH "" /* informational */ +#define KERN_DEBUG KERN_SOH "" /* debug-level messages */ + +#define KERN_DEFAULT KERN_SOH "" /* the default kernel loglevel */ + +/* + * Annotation for a "continued" line of log printout (only done after a + * line that had no enclosing \n). Only to be used by core/arch code + * during early bootup (a continued line is not SMP-safe otherwise). + */ +#define KERN_CONT "" + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h new file mode 100644 index 000000000000..a11e3c357be7 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/kernel.h @@ -0,0 +1,44 @@ +#ifndef _LIBLOCKDEP_LINUX_KERNEL_H_ +#define _LIBLOCKDEP_LINUX_KERNEL_H_ + +#include +#include +#include +#include +#include + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); }) +#endif + +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) + +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#define WARN_ON(x) (x) +#define WARN_ON_ONCE(x) (x) +#define likely(x) (x) +#define WARN(x, y, z) (x) +#define uninitialized_var(x) x +#define __init +#define noinline +#define list_add_tail_rcu list_add_tail + +#ifndef CALLER_ADDR0 +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#endif + +#ifndef _RET_IP_ +#define _RET_IP_ CALLER_ADDR0 +#endif + +#ifndef _THIS_IP_ +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +#endif + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/kmemcheck.h b/tools/lib/lockdep/uinclude/linux/kmemcheck.h new file mode 100644 index 000000000000..94d598bc6abe --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/kmemcheck.h @@ -0,0 +1,8 @@ +#ifndef _LIBLOCKDEP_LINUX_KMEMCHECK_H_ +#define _LIBLOCKDEP_LINUX_KMEMCHECK_H_ + +static inline void kmemcheck_mark_initialized(void *address, unsigned int n) +{ +} + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/linkage.h b/tools/lib/lockdep/uinclude/linux/linkage.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/linkage.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/linux/list.h b/tools/lib/lockdep/uinclude/linux/list.h new file mode 100644 index 000000000000..6e9ef31ed82e --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/list.h @@ -0,0 +1 @@ +#include "../../../include/linux/list.h" diff --git a/tools/lib/lockdep/uinclude/linux/lockdep.h b/tools/lib/lockdep/uinclude/linux/lockdep.h new file mode 100644 index 000000000000..d0f5d6e50214 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/lockdep.h @@ -0,0 +1,55 @@ +#ifndef _LIBLOCKDEP_LOCKDEP_H_ +#define _LIBLOCKDEP_LOCKDEP_H_ + +#include +#include +#include +#include +#include + + +#define MAX_LOCK_DEPTH 2000UL + +#include "../../../include/linux/lockdep.h" + +struct task_struct { + u64 curr_chain_key; + int lockdep_depth; + unsigned int lockdep_recursion; + struct held_lock held_locks[MAX_LOCK_DEPTH]; + gfp_t lockdep_reclaim_gfp; + int pid; + char comm[17]; +}; + +extern struct task_struct *__curr(void); + +#define current (__curr()) + +#define debug_locks_off() 1 +#define task_pid_nr(tsk) ((tsk)->pid) + +#define KSYM_NAME_LEN 128 +#define printk printf + +#define list_del_rcu list_del + +#define atomic_t unsigned long +#define atomic_inc(x) ((*(x))++) + +static struct new_utsname *init_utsname(void) +{ + static struct new_utsname n = (struct new_utsname) { + .release = "liblockdep", + .version = LIBLOCKDEP_VERSION, + }; + + return &n; +} + +#define print_tainted() "" +#define static_obj(x) 1 + +#define debug_show_all_locks() + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/module.h b/tools/lib/lockdep/uinclude/linux/module.h new file mode 100644 index 000000000000..09c7a7be8ccc --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/module.h @@ -0,0 +1,6 @@ +#ifndef _LIBLOCKDEP_LINUX_MODULE_H_ +#define _LIBLOCKDEP_LINUX_MODULE_H_ + +#define module_param(name, type, perm) + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/mutex.h b/tools/lib/lockdep/uinclude/linux/mutex.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/mutex.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/linux/poison.h b/tools/lib/lockdep/uinclude/linux/poison.h new file mode 100644 index 000000000000..0c27bdf14233 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/poison.h @@ -0,0 +1 @@ +#include "../../../include/linux/poison.h" diff --git a/tools/lib/lockdep/uinclude/linux/prefetch.h b/tools/lib/lockdep/uinclude/linux/prefetch.h new file mode 100644 index 000000000000..d73fe6f850ac --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/prefetch.h @@ -0,0 +1,6 @@ +#ifndef _LIBLOCKDEP_LINUX_PREFETCH_H_ +#define _LIBLOCKDEP_LINUX_PREFETCH_H + +static inline void prefetch(void *a __attribute__((unused))) { } + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/proc_fs.h b/tools/lib/lockdep/uinclude/linux/proc_fs.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/proc_fs.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/linux/rbtree.h b/tools/lib/lockdep/uinclude/linux/rbtree.h new file mode 100644 index 000000000000..965901db4862 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/rbtree.h @@ -0,0 +1 @@ +#include "../../../include/linux/rbtree.h" diff --git a/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h b/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h new file mode 100644 index 000000000000..c3759477379c --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h @@ -0,0 +1,2 @@ +#define __always_inline +#include "../../../include/linux/rbtree_augmented.h" diff --git a/tools/lib/lockdep/uinclude/linux/rcu.h b/tools/lib/lockdep/uinclude/linux/rcu.h new file mode 100644 index 000000000000..4c99fcb5da27 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/rcu.h @@ -0,0 +1,16 @@ +#ifndef _LIBLOCKDEP_RCU_H_ +#define _LIBLOCKDEP_RCU_H_ + +int rcu_scheduler_active; + +static inline int rcu_lockdep_current_cpu_online(void) +{ + return 1; +} + +static inline int rcu_is_cpu_idle(void) +{ + return 1; +} + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/seq_file.h b/tools/lib/lockdep/uinclude/linux/seq_file.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/seq_file.h @@ -0,0 +1,3 @@ + +/* empty file */ + diff --git a/tools/lib/lockdep/uinclude/linux/spinlock.h b/tools/lib/lockdep/uinclude/linux/spinlock.h new file mode 100644 index 000000000000..68c1aa2bcba5 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/spinlock.h @@ -0,0 +1,25 @@ +#ifndef _LIBLOCKDEP_SPINLOCK_H_ +#define _LIBLOCKDEP_SPINLOCK_H_ + +#include +#include + +#define arch_spinlock_t pthread_mutex_t +#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER + +static inline void arch_spin_lock(arch_spinlock_t *mutex) +{ + pthread_mutex_lock(mutex); +} + +static inline void arch_spin_unlock(arch_spinlock_t *mutex) +{ + pthread_mutex_unlock(mutex); +} + +static inline bool arch_spin_is_locked(arch_spinlock_t *mutex) +{ + return true; +} + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/stacktrace.h b/tools/lib/lockdep/uinclude/linux/stacktrace.h new file mode 100644 index 000000000000..39aecc6b19d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/stacktrace.h @@ -0,0 +1,32 @@ +#ifndef _LIBLOCKDEP_LINUX_STACKTRACE_H_ +#define _LIBLOCKDEP_LINUX_STACKTRACE_H_ + +#include + +struct stack_trace { + unsigned int nr_entries, max_entries; + unsigned long *entries; + int skip; +}; + +static inline void print_stack_trace(struct stack_trace *trace, int spaces) +{ + backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1); +} + +#define save_stack_trace(trace) \ + ((trace)->nr_entries = \ + backtrace((void **)(trace)->entries, (trace)->max_entries)) + +static inline int dump_stack(void) +{ + void *array[64]; + size_t size; + + size = backtrace(array, 64); + backtrace_symbols_fd(array, size, 1); + + return 0; +} + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/stringify.h b/tools/lib/lockdep/uinclude/linux/stringify.h new file mode 100644 index 000000000000..05dfcd1ac118 --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/stringify.h @@ -0,0 +1,7 @@ +#ifndef _LIBLOCKDEP_LINUX_STRINGIFY_H_ +#define _LIBLOCKDEP_LINUX_STRINGIFY_H_ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#endif diff --git a/tools/lib/lockdep/uinclude/linux/types.h b/tools/lib/lockdep/uinclude/linux/types.h new file mode 100644 index 000000000000..929938f426de --- /dev/null +++ b/tools/lib/lockdep/uinclude/linux/types.h @@ -0,0 +1,58 @@ +#ifndef _LIBLOCKDEP_LINUX_TYPES_H_ +#define _LIBLOCKDEP_LINUX_TYPES_H_ + +#include +#include + +#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ +#include + +struct page; +struct kmem_cache; + +typedef unsigned gfp_t; + +typedef __u64 u64; +typedef __s64 s64; + +typedef __u32 u32; +typedef __s32 s32; + +typedef __u16 u16; +typedef __s16 s16; + +typedef __u8 u8; +typedef __s8 s8; + +#ifdef __CHECKER__ +#define __bitwise__ __attribute__((bitwise)) +#else +#define __bitwise__ +#endif +#ifdef __CHECK_ENDIAN__ +#define __bitwise __bitwise__ +#else +#define __bitwise +#endif + + +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; + +struct list_head { + struct list_head *next, *prev; +}; + +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next, **pprev; +}; + +#endif diff --git a/tools/lib/lockdep/uinclude/trace/events/lock.h b/tools/lib/lockdep/uinclude/trace/events/lock.h new file mode 100644 index 000000000000..fab00ff936d1 --- /dev/null +++ b/tools/lib/lockdep/uinclude/trace/events/lock.h @@ -0,0 +1,3 @@ + +/* empty file */ + From 45e6207464b59dca63c8a9a79a7befbbf6a68fdb Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Jun 2013 18:41:18 -0400 Subject: [PATCH 082/981] liblockdep: Add public headers for pthread_mutex_t implementation These headers provide the same API as their pthread mutex counterparts. The design here is to allow to easily switch to liblockdep lock validation just by adding a "liblockdep_" to pthread_mutex_*() calls, which means that it's easy to integrate liblockdep into existing codebases. Signed-off-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/1371163284-6346-4-git-send-email-sasha.levin@oracle.com Signed-off-by: Ingo Molnar --- tools/lib/lockdep/include/liblockdep/common.h | 50 +++++++++++++ tools/lib/lockdep/include/liblockdep/mutex.h | 70 +++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 tools/lib/lockdep/include/liblockdep/common.h create mode 100644 tools/lib/lockdep/include/liblockdep/mutex.h diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h new file mode 100644 index 000000000000..0bda630027c3 --- /dev/null +++ b/tools/lib/lockdep/include/liblockdep/common.h @@ -0,0 +1,50 @@ +#ifndef _LIBLOCKDEP_COMMON_H +#define _LIBLOCKDEP_COMMON_H + +#include + +#define NR_LOCKDEP_CACHING_CLASSES 2 +#define MAX_LOCKDEP_SUBCLASSES 8UL + +#ifndef CALLER_ADDR0 +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#endif + +#ifndef _RET_IP_ +#define _RET_IP_ CALLER_ADDR0 +#endif + +#ifndef _THIS_IP_ +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +#endif + +struct lockdep_subclass_key { + char __one_byte; +}; + +struct lock_class_key { + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; +}; + +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; + const char *name; +#ifdef CONFIG_LOCK_STAT + int cpu; + unsigned long ip; +#endif +}; + +void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass); +void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *nest_lock, unsigned long ip); +void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip); + +#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ + { .name = (_name), .key = (void *)(_key), } + +#endif diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h new file mode 100644 index 000000000000..c342f7087147 --- /dev/null +++ b/tools/lib/lockdep/include/liblockdep/mutex.h @@ -0,0 +1,70 @@ +#ifndef _LIBLOCKDEP_MUTEX_H +#define _LIBLOCKDEP_MUTEX_H + +#include +#include "common.h" + +struct liblockdep_pthread_mutex { + pthread_mutex_t mutex; + struct lockdep_map dep_map; +}; + +typedef struct liblockdep_pthread_mutex liblockdep_pthread_mutex_t; + +#define LIBLOCKDEP_PTHREAD_MUTEX_INITIALIZER(mtx) \ + (const struct liblockdep_pthread_mutex) { \ + .mutex = PTHREAD_MUTEX_INITIALIZER, \ + .dep_map = STATIC_LOCKDEP_MAP_INIT(#mtx, &((&(mtx))->dep_map)), \ +} + +static inline int __mutex_init(liblockdep_pthread_mutex_t *lock, + const char *name, + struct lock_class_key *key, + const pthread_mutexattr_t *__mutexattr) +{ + lockdep_init_map(&lock->dep_map, name, key, 0); + return pthread_mutex_init(&lock->mutex, __mutexattr); +} + +#define liblockdep_pthread_mutex_init(mutex, mutexattr) \ +({ \ + static struct lock_class_key __key; \ + \ + __mutex_init((mutex), #mutex, &__key, (mutexattr)); \ +}) + +static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_); + return pthread_mutex_lock(&lock->mutex); +} + +static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lock) +{ + lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_); + return pthread_mutex_unlock(&lock->mutex); +} + +static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_); + return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0; +} + +static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock) +{ + return pthread_mutex_destroy(&lock->mutex); +} + +#ifdef __USE_LIBLOCKDEP + +#define pthread_mutex_t liblockdep_pthread_mutex_t +#define pthread_mutex_init liblockdep_pthread_mutex_init +#define pthread_mutex_lock liblockdep_pthread_mutex_lock +#define pthread_mutex_unlock liblockdep_pthread_mutex_unlock +#define pthread_mutex_trylock liblockdep_pthread_mutex_trylock +#define pthread_mutex_destroy liblockdep_pthread_mutex_destroy + +#endif + +#endif From 878f968eeb852383ff79dc3f181db24e5b52fd75 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Jun 2013 18:41:19 -0400 Subject: [PATCH 083/981] liblockdep: Add pthread_mutex_t test suite This is a rather simple and basic test suite to test common locking issues. Beyond tests, it also shows how to use the library. Signed-off-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/1371163284-6346-5-git-send-email-sasha.levin@oracle.com Signed-off-by: Ingo Molnar --- tools/lib/lockdep/run_tests.sh | 27 ++++++++++++++++++++++++ tools/lib/lockdep/tests/AA.c | 13 ++++++++++++ tools/lib/lockdep/tests/ABBA.c | 13 ++++++++++++ tools/lib/lockdep/tests/ABBCCA.c | 15 +++++++++++++ tools/lib/lockdep/tests/ABBCCDDA.c | 17 +++++++++++++++ tools/lib/lockdep/tests/ABCABC.c | 15 +++++++++++++ tools/lib/lockdep/tests/ABCDBCDA.c | 17 +++++++++++++++ tools/lib/lockdep/tests/ABCDBDDA.c | 17 +++++++++++++++ tools/lib/lockdep/tests/common.h | 12 +++++++++++ tools/lib/lockdep/tests/unlock_balance.c | 12 +++++++++++ 10 files changed, 158 insertions(+) create mode 100644 tools/lib/lockdep/run_tests.sh create mode 100644 tools/lib/lockdep/tests/AA.c create mode 100644 tools/lib/lockdep/tests/ABBA.c create mode 100644 tools/lib/lockdep/tests/ABBCCA.c create mode 100644 tools/lib/lockdep/tests/ABBCCDDA.c create mode 100644 tools/lib/lockdep/tests/ABCABC.c create mode 100644 tools/lib/lockdep/tests/ABCDBCDA.c create mode 100644 tools/lib/lockdep/tests/ABCDBDDA.c create mode 100644 tools/lib/lockdep/tests/common.h create mode 100644 tools/lib/lockdep/tests/unlock_balance.c diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh new file mode 100644 index 000000000000..5334ad9d39b7 --- /dev/null +++ b/tools/lib/lockdep/run_tests.sh @@ -0,0 +1,27 @@ +#! /bin/bash + +make &> /dev/null + +for i in `ls tests/*.c`; do + testname=$(basename -s .c "$i") + gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null + echo -ne "$testname... " + if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then + echo "PASSED!" + else + echo "FAILED!" + fi + rm tests/$testname +done + +for i in `ls tests/*.c`; do + testname=$(basename -s .c "$i") + gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null + echo -ne "(PRELOAD) $testname... " + if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then + echo "PASSED!" + else + echo "FAILED!" + fi + rm tests/$testname +done diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c new file mode 100644 index 000000000000..0f782ff404ac --- /dev/null +++ b/tools/lib/lockdep/tests/AA.c @@ -0,0 +1,13 @@ +#include + +void main(void) +{ + pthread_mutex_t a, b; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + + pthread_mutex_lock(&a); + pthread_mutex_lock(&b); + pthread_mutex_lock(&a); +} diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c new file mode 100644 index 000000000000..07f0e29d5485 --- /dev/null +++ b/tools/lib/lockdep/tests/ABBA.c @@ -0,0 +1,13 @@ +#include +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(b, a); +} diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c new file mode 100644 index 000000000000..843db09ac666 --- /dev/null +++ b/tools/lib/lockdep/tests/ABBCCA.c @@ -0,0 +1,15 @@ +#include +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b, c; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + pthread_mutex_init(&c, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(b, c); + LOCK_UNLOCK_2(c, a); +} diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c b/tools/lib/lockdep/tests/ABBCCDDA.c new file mode 100644 index 000000000000..33620e268f85 --- /dev/null +++ b/tools/lib/lockdep/tests/ABBCCDDA.c @@ -0,0 +1,17 @@ +#include +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b, c, d; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + pthread_mutex_init(&c, NULL); + pthread_mutex_init(&d, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(b, c); + LOCK_UNLOCK_2(c, d); + LOCK_UNLOCK_2(d, a); +} diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c new file mode 100644 index 000000000000..3fee51e3a68a --- /dev/null +++ b/tools/lib/lockdep/tests/ABCABC.c @@ -0,0 +1,15 @@ +#include +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b, c; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + pthread_mutex_init(&c, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(c, a); + LOCK_UNLOCK_2(b, c); +} diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c b/tools/lib/lockdep/tests/ABCDBCDA.c new file mode 100644 index 000000000000..427ba562c75b --- /dev/null +++ b/tools/lib/lockdep/tests/ABCDBCDA.c @@ -0,0 +1,17 @@ +#include +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b, c, d; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + pthread_mutex_init(&c, NULL); + pthread_mutex_init(&d, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(c, d); + LOCK_UNLOCK_2(b, c); + LOCK_UNLOCK_2(d, a); +} diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c b/tools/lib/lockdep/tests/ABCDBDDA.c new file mode 100644 index 000000000000..680c6cf3e919 --- /dev/null +++ b/tools/lib/lockdep/tests/ABCDBDDA.c @@ -0,0 +1,17 @@ +#include +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b, c, d; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + pthread_mutex_init(&c, NULL); + pthread_mutex_init(&d, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(c, d); + LOCK_UNLOCK_2(b, d); + LOCK_UNLOCK_2(d, a); +} diff --git a/tools/lib/lockdep/tests/common.h b/tools/lib/lockdep/tests/common.h new file mode 100644 index 000000000000..d89e94d47d86 --- /dev/null +++ b/tools/lib/lockdep/tests/common.h @@ -0,0 +1,12 @@ +#ifndef _LIBLOCKDEP_TEST_COMMON_H +#define _LIBLOCKDEP_TEST_COMMON_H + +#define LOCK_UNLOCK_2(a, b) \ + do { \ + pthread_mutex_lock(&(a)); \ + pthread_mutex_lock(&(b)); \ + pthread_mutex_unlock(&(b)); \ + pthread_mutex_unlock(&(a)); \ + } while(0) + +#endif diff --git a/tools/lib/lockdep/tests/unlock_balance.c b/tools/lib/lockdep/tests/unlock_balance.c new file mode 100644 index 000000000000..0bc62de686f7 --- /dev/null +++ b/tools/lib/lockdep/tests/unlock_balance.c @@ -0,0 +1,12 @@ +#include + +void main(void) +{ + pthread_mutex_t a; + + pthread_mutex_init(&a, NULL); + + pthread_mutex_lock(&a); + pthread_mutex_unlock(&a); + pthread_mutex_unlock(&a); +} From 5a52c9b480e09a782618dbf08de57f9ca54c8b49 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Jun 2013 18:41:20 -0400 Subject: [PATCH 084/981] liblockdep: Add public headers for pthread_rwlock_t implementation Both pthreads and lockdep support dealing with rwlocks, so here's the liblockdep implementation for those. Signed-off-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/1371163284-6346-6-git-send-email-sasha.levin@oracle.com Signed-off-by: Ingo Molnar --- tools/lib/lockdep/include/liblockdep/rwlock.h | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 tools/lib/lockdep/include/liblockdep/rwlock.h diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h new file mode 100644 index 000000000000..a680ab8c2e36 --- /dev/null +++ b/tools/lib/lockdep/include/liblockdep/rwlock.h @@ -0,0 +1,86 @@ +#ifndef _LIBLOCKDEP_RWLOCK_H +#define _LIBLOCKDEP_RWLOCK_H + +#include +#include "common.h" + +struct liblockdep_pthread_rwlock { + pthread_rwlock_t rwlock; + struct lockdep_map dep_map; +}; + +typedef struct liblockdep_pthread_rwlock liblockdep_pthread_rwlock_t; + +#define LIBLOCKDEP_PTHREAD_RWLOCK_INITIALIZER(rwl) \ + (struct liblockdep_pthread_rwlock) { \ + .rwlock = PTHREAD_RWLOCK_INITIALIZER, \ + .dep_map = STATIC_LOCKDEP_MAP_INIT(#rwl, &((&(rwl))->dep_map)), \ +} + +static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock, + const char *name, + struct lock_class_key *key, + const pthread_rwlockattr_t *attr) +{ + lockdep_init_map(&lock->dep_map, name, key, 0); + + return pthread_rwlock_init(&lock->rwlock, attr); +} + +#define liblockdep_pthread_rwlock_init(lock, attr) \ +({ \ + static struct lock_class_key __key; \ + \ + __rwlock_init((lock), #lock, &__key, (attr)); \ +}) + +static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 0, 2, 2, NULL, (unsigned long)_RET_IP_); + return pthread_rwlock_rdlock(&lock->rwlock); + +} + +static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *lock) +{ + lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_); + return pthread_rwlock_unlock(&lock->rwlock); +} + +static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_); + return pthread_rwlock_wrlock(&lock->rwlock); +} + +static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 1, 2, 2, NULL, (unsigned long)_RET_IP_); + return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0; +} + +static inline int liblockdep_pthread_rwlock_trywlock(liblockdep_pthread_rwlock_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_); + return pthread_rwlock_trywlock(&lock->rwlock) == 0 ? 1 : 0; +} + +static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock) +{ + return pthread_rwlock_destroy(&lock->rwlock); +} + +#ifdef __USE_LIBLOCKDEP + +#define pthread_rwlock_t liblockdep_pthread_rwlock_t +#define pthread_rwlock_init liblockdep_pthread_rwlock_init +#define pthread_rwlock_rdlock liblockdep_pthread_rwlock_rdlock +#define pthread_rwlock_unlock liblockdep_pthread_rwlock_unlock +#define pthread_rwlock_wrlock liblockdep_pthread_rwlock_wrlock +#define pthread_rwlock_tryrdlock liblockdep_pthread_rwlock_tryrdlock +#define pthread_rwlock_trywlock liblockdep_pthread_rwlock_trywlock +#define pthread_rwlock_destroy liblockdep_rwlock_destroy + +#endif + +#endif From dbe941827eab53194eda5cd350a4e1414f192658 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Jun 2013 18:41:21 -0400 Subject: [PATCH 085/981] liblockdep: Add pthread_rwlock_t test suite A simple test to make sure we handle rwlocks correctly. Signed-off-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/1371163284-6346-7-git-send-email-sasha.levin@oracle.com Signed-off-by: Ingo Molnar --- tools/lib/lockdep/tests/WW.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tools/lib/lockdep/tests/WW.c diff --git a/tools/lib/lockdep/tests/WW.c b/tools/lib/lockdep/tests/WW.c new file mode 100644 index 000000000000..d44f77d71029 --- /dev/null +++ b/tools/lib/lockdep/tests/WW.c @@ -0,0 +1,13 @@ +#include + +void main(void) +{ + pthread_rwlock_t a, b; + + pthread_rwlock_init(&a, NULL); + pthread_rwlock_init(&b, NULL); + + pthread_rwlock_wrlock(&a); + pthread_rwlock_rdlock(&b); + pthread_rwlock_wrlock(&a); +} From 231941eec8aeee4f0ac210a28e484200b20f74d8 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Jun 2013 18:41:22 -0400 Subject: [PATCH 086/981] liblockdep: Support using LD_PRELOAD This allows lockdep to be used without being compiled in the original program. Usage is quite simple: LD_PRELOAD=/path/to/liblockdep.so /path/to/my/program And magically, you'll have lockdep checking in your program! Signed-off-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/1371163284-6346-8-git-send-email-sasha.levin@oracle.com Signed-off-by: Ingo Molnar --- tools/lib/lockdep/preload.c | 447 ++++++++++++++++++++++++++++++++++++ 1 file changed, 447 insertions(+) create mode 100644 tools/lib/lockdep/preload.c diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c new file mode 100644 index 000000000000..f8465a811aa5 --- /dev/null +++ b/tools/lib/lockdep/preload.c @@ -0,0 +1,447 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "include/liblockdep/mutex.h" +#include "../../../include/linux/rbtree.h" + +/** + * struct lock_lookup - liblockdep's view of a single unique lock + * @orig: pointer to the original pthread lock, used for lookups + * @dep_map: lockdep's dep_map structure + * @key: lockdep's key structure + * @node: rb-tree node used to store the lock in a global tree + * @name: a unique name for the lock + */ +struct lock_lookup { + void *orig; /* Original pthread lock, used for lookups */ + struct lockdep_map dep_map; /* Since all locks are dynamic, we need + * a dep_map and a key for each lock */ + /* + * Wait, there's no support for key classes? Yup :( + * Most big projects wrap the pthread api with their own calls to + * be compatible with different locking methods. This means that + * "classes" will be brokes since the function that creates all + * locks will point to a generic locking function instead of the + * actual code that wants to do the locking. + */ + struct lock_class_key key; + struct rb_node node; +#define LIBLOCKDEP_MAX_LOCK_NAME 22 + char name[LIBLOCKDEP_MAX_LOCK_NAME]; +}; + +/* This is where we store our locks */ +static struct rb_root locks = RB_ROOT; +static pthread_rwlock_t locks_rwlock = PTHREAD_RWLOCK_INITIALIZER; + +/* pthread mutex API */ + +#ifdef __GLIBC__ +extern int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr); +extern int __pthread_mutex_lock(pthread_mutex_t *mutex); +extern int __pthread_mutex_trylock(pthread_mutex_t *mutex); +extern int __pthread_mutex_unlock(pthread_mutex_t *mutex); +extern int __pthread_mutex_destroy(pthread_mutex_t *mutex); +#else +#define __pthread_mutex_init NULL +#define __pthread_mutex_lock NULL +#define __pthread_mutex_trylock NULL +#define __pthread_mutex_unlock NULL +#define __pthread_mutex_destroy NULL +#endif +static int (*ll_pthread_mutex_init)(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) = __pthread_mutex_init; +static int (*ll_pthread_mutex_lock)(pthread_mutex_t *mutex) = __pthread_mutex_lock; +static int (*ll_pthread_mutex_trylock)(pthread_mutex_t *mutex) = __pthread_mutex_trylock; +static int (*ll_pthread_mutex_unlock)(pthread_mutex_t *mutex) = __pthread_mutex_unlock; +static int (*ll_pthread_mutex_destroy)(pthread_mutex_t *mutex) = __pthread_mutex_destroy; + +/* pthread rwlock API */ + +#ifdef __GLIBC__ +extern int __pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr); +extern int __pthread_rwlock_destroy(pthread_rwlock_t *rwlock); +extern int __pthread_rwlock_wrlock(pthread_rwlock_t *rwlock); +extern int __pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock); +extern int __pthread_rwlock_rdlock(pthread_rwlock_t *rwlock); +extern int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock); +extern int __pthread_rwlock_unlock(pthread_rwlock_t *rwlock); +#else +#define __pthread_rwlock_init NULL +#define __pthread_rwlock_destroy NULL +#define __pthread_rwlock_wrlock NULL +#define __pthread_rwlock_trywrlock NULL +#define __pthread_rwlock_rdlock NULL +#define __pthread_rwlock_tryrdlock NULL +#define __pthread_rwlock_unlock NULL +#endif + +static int (*ll_pthread_rwlock_init)(pthread_rwlock_t *rwlock, + const pthread_rwlockattr_t *attr) = __pthread_rwlock_init; +static int (*ll_pthread_rwlock_destroy)(pthread_rwlock_t *rwlock) = __pthread_rwlock_destroy; +static int (*ll_pthread_rwlock_rdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_rdlock; +static int (*ll_pthread_rwlock_tryrdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_tryrdlock; +static int (*ll_pthread_rwlock_trywrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_trywrlock; +static int (*ll_pthread_rwlock_wrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_wrlock; +static int (*ll_pthread_rwlock_unlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_unlock; + +enum { none, prepare, done, } __init_state; +static void init_preload(void); +static void try_init_preload(void) +{ + if (!__init_state != done) + init_preload(); +} + +static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent) +{ + struct rb_node **node = &locks.rb_node; + struct lock_lookup *l; + + *parent = NULL; + + while (*node) { + l = rb_entry(*node, struct lock_lookup, node); + + *parent = *node; + if (lock < l->orig) + node = &l->node.rb_left; + else if (lock > l->orig) + node = &l->node.rb_right; + else + return node; + } + + return node; +} + +#ifndef LIBLOCKDEP_STATIC_ENTRIES +#define LIBLOCKDEP_STATIC_ENTRIES 1024 +#endif + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES]; +static int __locks_nr; + +static inline bool is_static_lock(struct lock_lookup *lock) +{ + return lock >= __locks && lock < __locks + ARRAY_SIZE(__locks); +} + +static struct lock_lookup *alloc_lock(void) +{ + if (__init_state != done) { + /* + * Some programs attempt to initialize and use locks in their + * allocation path. This means that a call to malloc() would + * result in locks being initialized and locked. + * + * Why is it an issue for us? dlsym() below will try allocating + * to give us the original function. Since this allocation will + * result in a locking operations, we have to let pthread deal + * with it, but we can't! we don't have the pointer to the + * original API since we're inside dlsym() trying to get it + */ + + int idx = __locks_nr++; + if (idx >= ARRAY_SIZE(__locks)) { + fprintf(stderr, + "LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n"); + exit(EX_UNAVAILABLE); + } + return __locks + idx; + } + + return malloc(sizeof(struct lock_lookup)); +} + +static inline void free_lock(struct lock_lookup *lock) +{ + if (likely(!is_static_lock(lock))) + free(lock); +} + +/** + * __get_lock - find or create a lock instance + * @lock: pointer to a pthread lock function + * + * Try to find an existing lock in the rbtree using the provided pointer. If + * one wasn't found - create it. + */ +static struct lock_lookup *__get_lock(void *lock) +{ + struct rb_node **node, *parent; + struct lock_lookup *l; + + ll_pthread_rwlock_rdlock(&locks_rwlock); + node = __get_lock_node(lock, &parent); + ll_pthread_rwlock_unlock(&locks_rwlock); + if (*node) { + return rb_entry(*node, struct lock_lookup, node); + } + + /* We didn't find the lock, let's create it */ + l = alloc_lock(); + if (l == NULL) + return NULL; + + l->orig = lock; + /* + * Currently the name of the lock is the ptr value of the pthread lock, + * while not optimal, it makes debugging a bit easier. + * + * TODO: Get the real name of the lock using libdwarf + */ + sprintf(l->name, "%p", lock); + lockdep_init_map(&l->dep_map, l->name, &l->key, 0); + + ll_pthread_rwlock_wrlock(&locks_rwlock); + /* This might have changed since the last time we fetched it */ + node = __get_lock_node(lock, &parent); + rb_link_node(&l->node, parent, node); + rb_insert_color(&l->node, &locks); + ll_pthread_rwlock_unlock(&locks_rwlock); + + return l; +} + +static void __del_lock(struct lock_lookup *lock) +{ + ll_pthread_rwlock_wrlock(&locks_rwlock); + rb_erase(&lock->node, &locks); + ll_pthread_rwlock_unlock(&locks_rwlock); + free_lock(lock); +} + +int pthread_mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) +{ + int r; + + /* + * We keep trying to init our preload module because there might be + * code in init sections that tries to touch locks before we are + * initialized, in that case we'll need to manually call preload + * to get us going. + * + * Funny enough, kernel's lockdep had the same issue, and used + * (almost) the same solution. See look_up_lock_class() in + * kernel/locking/lockdep.c for details. + */ + try_init_preload(); + + r = ll_pthread_mutex_init(mutex, attr); + if (r == 0) + /* + * We do a dummy initialization here so that lockdep could + * warn us if something fishy is going on - such as + * initializing a held lock. + */ + __get_lock(mutex); + + return r; +} + +int pthread_mutex_lock(pthread_mutex_t *mutex) +{ + int r; + + try_init_preload(); + + lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 2, NULL, + (unsigned long)_RET_IP_); + /* + * Here's the thing with pthread mutexes: unlike the kernel variant, + * they can fail. + * + * This means that the behaviour here is a bit different from what's + * going on in the kernel: there we just tell lockdep that we took the + * lock before actually taking it, but here we must deal with the case + * that locking failed. + * + * To do that we'll "release" the lock if locking failed - this way + * we'll get lockdep doing the correct checks when we try to take + * the lock, and if that fails - we'll be back to the correct + * state by releasing it. + */ + r = ll_pthread_mutex_lock(mutex); + if (r) + lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_mutex_trylock(pthread_mutex_t *mutex) +{ + int r; + + try_init_preload(); + + lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_); + r = ll_pthread_mutex_trylock(mutex); + if (r) + lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + int r; + + try_init_preload(); + + lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_); + /* + * Just like taking a lock, only in reverse! + * + * If we fail releasing the lock, tell lockdep we're holding it again. + */ + r = ll_pthread_mutex_unlock(mutex); + if (r) + lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_mutex_destroy(pthread_mutex_t *mutex) +{ + try_init_preload(); + + /* + * Let's see if we're releasing a lock that's held. + * + * TODO: Hook into free() and add that check there as well. + */ + debug_check_no_locks_freed(mutex, mutex + sizeof(*mutex)); + __del_lock(__get_lock(mutex)); + return ll_pthread_mutex_destroy(mutex); +} + +/* This is the rwlock part, very similar to what happened with mutex above */ +int pthread_rwlock_init(pthread_rwlock_t *rwlock, + const pthread_rwlockattr_t *attr) +{ + int r; + + try_init_preload(); + + r = ll_pthread_rwlock_init(rwlock, attr); + if (r == 0) + __get_lock(rwlock); + + return r; +} + +int pthread_rwlock_destroy(pthread_rwlock_t *rwlock) +{ + try_init_preload(); + + debug_check_no_locks_freed(rwlock, rwlock + sizeof(*rwlock)); + __del_lock(__get_lock(rwlock)); + return ll_pthread_rwlock_destroy(rwlock); +} + +int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock) +{ + int r; + + init_preload(); + + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 2, NULL, (unsigned long)_RET_IP_); + r = ll_pthread_rwlock_rdlock(rwlock); + if (r) + lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock) +{ + int r; + + init_preload(); + + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 2, NULL, (unsigned long)_RET_IP_); + r = ll_pthread_rwlock_tryrdlock(rwlock); + if (r) + lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock) +{ + int r; + + init_preload(); + + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_); + r = ll_pthread_rwlock_trywrlock(rwlock); + if (r) + lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock) +{ + int r; + + init_preload(); + + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_); + r = ll_pthread_rwlock_wrlock(rwlock); + if (r) + lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_rwlock_unlock(pthread_rwlock_t *rwlock) +{ + int r; + + init_preload(); + + lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); + r = ll_pthread_rwlock_unlock(rwlock); + if (r) + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_); + + return r; +} + +__attribute__((constructor)) static void init_preload(void) +{ + if (__init_state != done) + return; + +#ifndef __GLIBC__ + __init_state = prepare; + + ll_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init"); + ll_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock"); + ll_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock"); + ll_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock"); + ll_pthread_mutex_destroy = dlsym(RTLD_NEXT, "pthread_mutex_destroy"); + + ll_pthread_rwlock_init = dlsym(RTLD_NEXT, "pthread_rwlock_init"); + ll_pthread_rwlock_destroy = dlsym(RTLD_NEXT, "pthread_rwlock_destroy"); + ll_pthread_rwlock_rdlock = dlsym(RTLD_NEXT, "pthread_rwlock_rdlock"); + ll_pthread_rwlock_tryrdlock = dlsym(RTLD_NEXT, "pthread_rwlock_tryrdlock"); + ll_pthread_rwlock_wrlock = dlsym(RTLD_NEXT, "pthread_rwlock_wrlock"); + ll_pthread_rwlock_trywrlock = dlsym(RTLD_NEXT, "pthread_rwlock_trywrlock"); + ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock"); +#endif + + printf("%p\n", ll_pthread_mutex_trylock);fflush(stdout); + + lockdep_init(); + + __init_state = done; +} From f612ac05b7ce66919507d25f4c81e4272f7a8705 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Jun 2013 18:41:23 -0400 Subject: [PATCH 087/981] liblockdep: Add the 'lockdep' user-space utility This is a simple wrapper to make using liblockdep on existing applications much easier. After running 'make && make install', it becomes quite simple to test things with liblockdep. For example, to try it on perf: lockdep perf No other integration required. Signed-off-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/1371163284-6346-9-git-send-email-sasha.levin@oracle.com [ Changed it to load ./liblockdep.so, so it can be tested in situ. ] Signed-off-by: Ingo Molnar --- tools/lib/lockdep/lockdep | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 tools/lib/lockdep/lockdep diff --git a/tools/lib/lockdep/lockdep b/tools/lib/lockdep/lockdep new file mode 100755 index 000000000000..49af9fe19f5b --- /dev/null +++ b/tools/lib/lockdep/lockdep @@ -0,0 +1,3 @@ +#!/bin/bash + +LD_PRELOAD="./liblockdep.so $LD_PRELOAD" "$@" From 1acd437c59c7dd78d8f1e5f07bea6a18b7b5cd77 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 13 Jun 2013 18:41:24 -0400 Subject: [PATCH 088/981] liblockdep: Add a MAINTAINERS entry Signed-off-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/1371163284-6346-10-git-send-email-sasha.levin@oracle.com Signed-off-by: Ingo Molnar --- MAINTAINERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 63f30484932b..9e62574cca4a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5111,6 +5111,11 @@ F: drivers/lguest/ F: include/linux/lguest*.h F: tools/lguest/ +LIBLOCKDEP +M: Sasha Levin +S: Maintained +F: tools/lib/lockdep/ + LINUX FOR IBM pSERIES (RS/6000) M: Paul Mackerras W: http://www.ibm.com/linux/ltc/projects/ppc From bb8cbbfee68518796df4050868e5b0f5ad078f9f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 13 Nov 2013 15:36:12 +0100 Subject: [PATCH 089/981] tasks/fork: Remove unnecessary child->exit_state A zombie task obviously can't fork(), remove the unnecessary initialization of child->exit_state. It is zero anyway after dup_task_struct(). Note: copy_process() is huge and it has a lot of chaotic initializations, probably it makes sense to move them into the new helper called by dup_task_struct(). Signed-off-by: Oleg Nesterov Cc: David Laight Cc: Geert Uytterhoeven Cc: Tejun Heo Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131113143612.GA10540@redhat.com Signed-off-by: Ingo Molnar --- kernel/fork.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 728d5be9548c..b3080823a24d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1402,13 +1402,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->tgid = p->pid; } - p->pdeath_signal = 0; - p->exit_state = 0; - p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; + p->pdeath_signal = 0; INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; From 86506a99a62400e9f7b7d1344bcc9ea235faf98f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 13 Nov 2013 15:36:14 +0100 Subject: [PATCH 090/981] tasks/exit: Remove unused task_is_dead() method task_is_dead() has no users since commit 43e13cc107cf ("cred: remove task_is_dead() from __task_cred() validation"), and nobody except exit.c should rely on ->exit_state (we still have the users which should be changed). Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Cc: David Laight Cc: Geert Uytterhoeven Cc: Tejun Heo Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20131113143614.GA10547@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 7e35d4b9e14a..cda8d634bc47 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -168,7 +168,6 @@ extern char ___assert_task_state[1 - 2*!!( #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) -#define task_is_dead(task) ((task)->exit_state != 0) #define task_is_stopped_or_traced(task) \ ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ From 192301e70af3f6803c6354a464ebfa742da738ae Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 13 Nov 2013 16:45:38 +0100 Subject: [PATCH 091/981] sched: Check TASK_DEAD rather than EXIT_DEAD in schedule_debug() schedule_debug() ignores in_atomic() if prev->exit_state != 0. This is not what we want, ->exit_state is set by exit_notify() but we should complain until the task does the last schedule() in TASK_DEAD. See also 7407251a0e2e "PF_DEAD cleanup", I think this ancient commit explains why schedule() had to rely on ->exit_state, until that commit exit_notify() disabled preemption and set PF_DEAD which was used to detect the exiting task. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Cc: David Laight Cc: Geert Uytterhoeven Cc: Tejun Heo Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20131113154538.GB15810@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 687985b0284e..19db8f3b0e3b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2414,10 +2414,10 @@ static inline void schedule_debug(struct task_struct *prev) { /* * Test if we are atomic. Since do_exit() needs to call into - * schedule() atomically, we ignore that path for now. - * Otherwise, whine if we are scheduling when we should not be. + * schedule() atomically, we ignore that path. Otherwise whine + * if we are scheduling when we should not. */ - if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) + if (unlikely(in_atomic_preempt_off() && prev->state != TASK_DEAD)) __schedule_bug(prev); rcu_sleep_check(); From c44f2a020072d75d6b0cbf9f139a09719cda9367 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Fri, 18 Oct 2013 13:52:21 +0200 Subject: [PATCH 092/981] sched/fair: Move load idx selection in find_idlest_group load_idx is used in find_idlest_group but initialized in select_task_rq_fair even when not used. The load_idx initialisation is moved in find_idlest_group and the sd_flag replaces it in the function's args. Signed-off-by: Vincent Guittot Cc: len.brown@intel.com Cc: amit.kucheria@linaro.org Cc: pjt@google.com Cc: l.majewski@samsung.com Cc: Morten.Rasmussen@arm.com Cc: cmetcalf@tilera.com Cc: tony.luck@intel.com Cc: alex.shi@intel.com Cc: preeti@linux.vnet.ibm.com Cc: linaro-kernel@lists.linaro.org Cc: rjw@sisk.pl Cc: paulmck@linux.vnet.ibm.com Cc: corbet@lwn.net Cc: arjan@linux.intel.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1382097147-30088-8-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e8b652ebe027..6cb36c7ea391 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4110,12 +4110,16 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) */ static struct sched_group * find_idlest_group(struct sched_domain *sd, struct task_struct *p, - int this_cpu, int load_idx) + int this_cpu, int sd_flag) { struct sched_group *idlest = NULL, *group = sd->groups; unsigned long min_load = ULONG_MAX, this_load = 0; + int load_idx = sd->forkexec_idx; int imbalance = 100 + (sd->imbalance_pct-100)/2; + if (sd_flag & SD_BALANCE_WAKE) + load_idx = sd->wake_idx; + do { unsigned long load, avg_load; int local_group; @@ -4283,7 +4287,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } while (sd) { - int load_idx = sd->forkexec_idx; struct sched_group *group; int weight; @@ -4292,10 +4295,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f continue; } - if (sd_flag & SD_BALANCE_WAKE) - load_idx = sd->wake_idx; - - group = find_idlest_group(sd, p, cpu, load_idx); + group = find_idlest_group(sd, p, cpu, sd_flag); if (!group) { sd = sd->child; continue; From 380c9077b38df2962a22f00f21f6cd0db62d3390 Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Fri, 15 Nov 2013 15:06:52 +0530 Subject: [PATCH 093/981] sched/fair: Clean up update_sg_lb_stats() a bit Add rq->nr_running to sgs->sum_nr_running directly instead of assigning it through an intermediate variable nr_running. Signed-off-by: Kamalesh Babulal Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1384508212-25032-1-git-send-email-kamalesh@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6cb36c7ea391..a566c0739f77 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5500,7 +5500,6 @@ static inline void update_sg_lb_stats(struct lb_env *env, struct sched_group *group, int load_idx, int local_group, struct sg_lb_stats *sgs) { - unsigned long nr_running; unsigned long load; int i; @@ -5509,8 +5508,6 @@ static inline void update_sg_lb_stats(struct lb_env *env, for_each_cpu_and(i, sched_group_cpus(group), env->cpus) { struct rq *rq = cpu_rq(i); - nr_running = rq->nr_running; - /* Bias balancing toward cpus of our domain */ if (local_group) load = target_load(i, load_idx); @@ -5518,7 +5515,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, load = source_load(i, load_idx); sgs->group_load += load; - sgs->sum_nr_running += nr_running; + sgs->sum_nr_running += rq->nr_running; #ifdef CONFIG_NUMA_BALANCING sgs->nr_numa_running += rq->nr_numa_running; sgs->nr_preferred_running += rq->nr_preferred_running; From e6c390f2dfd04c165ce45b0032f73fba85b1f282 Mon Sep 17 00:00:00 2001 From: Dario Faggioli Date: Thu, 7 Nov 2013 14:43:35 +0100 Subject: [PATCH 094/981] sched: Add sched_class->task_dead() method Add a new function to the scheduling class interface. It is called at the end of a context switch, if the prev task is in TASK_DEAD state. It will be useful for the scheduling classes that want to be notified when one of their tasks dies, e.g. to perform some cleanup actions, such as SCHED_DEADLINE. Signed-off-by: Dario Faggioli Reviewed-by: Paul Turner Signed-off-by: Juri Lelli Cc: bruce.ashfield@windriver.com Cc: claudio@evidence.eu.com Cc: darren@dvhart.com Cc: dhaval.giani@gmail.com Cc: fchecconi@gmail.com Cc: fweisbec@gmail.com Cc: harald.gustafsson@ericsson.com Cc: hgu1972@gmail.com Cc: insop.song@gmail.com Cc: jkacur@redhat.com Cc: johan.eker@ericsson.com Cc: liming.wang@windriver.com Cc: luca.abeni@unitn.it Cc: michael@amarulasolutions.com Cc: nicola.manica@disi.unitn.it Cc: oleg@redhat.com Cc: paulmck@linux.vnet.ibm.com Cc: p.faure@akatech.ch Cc: rostedt@goodmis.org Cc: tommaso.cucinotta@sssup.it Cc: vincent.guittot@linaro.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1383831828-15501-2-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 3 +++ kernel/sched/sched.h | 1 + 2 files changed, 4 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 19db8f3b0e3b..25b377986547 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2003,6 +2003,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) if (unlikely(prev_state == TASK_DEAD)) { task_numa_free(prev); + if (prev->sched_class->task_dead) + prev->sched_class->task_dead(prev); + /* * Remove function-return probe instances associated with this * task and put them back on the free list. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 88c85b21d633..b3b4a4953efc 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1023,6 +1023,7 @@ struct sched_class { void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); void (*task_fork) (struct task_struct *p); + void (*task_dead) (struct task_struct *p); void (*switched_from) (struct rq *this_rq, struct task_struct *task); void (*switched_to) (struct rq *this_rq, struct task_struct *task); From 65661f96d3b32f4b28fef26d21be81d7e173b965 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 12 Nov 2013 17:58:51 +0100 Subject: [PATCH 095/981] perf/x86: Add RAPL hrtimer support The RAPL PMU counters do not interrupt on overflow. Therefore, the kernel needs to poll the counters to avoid missing an overflow. This patch adds the hrtimer code to do this. The timer interval is calculated at boot time based on the power unit used by the HW. There is one hrtimer per-cpu to handle the case of multiple simultaneous use across cores on the same package + hotplug CPU. Thanks to Maria Dimakopoulou for her contributions to this patch especially on the math aspects. Signed-off-by: Stephane Eranian Reviewed-by: Maria Dimakopoulou Reviewed-by: Andi Kleen [ Applied 32-bit build fix. ] Signed-off-by: Peter Zijlstra Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Cc: maria.n.dimakopoulou@gmail.com Link: http://lkml.kernel.org/r/1384275531-10892-5-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_rapl.c | 74 ++++++++++++++++++++- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index cfcd386b5d89..bf8e4a736d48 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -96,6 +96,8 @@ struct rapl_pmu { int n_active; /* number of active events */ struct list_head active_list; struct pmu *pmu; /* pointer to rapl_pmu_class */ + ktime_t timer_interval; /* in ktime_t unit */ + struct hrtimer hrtimer; }; static struct pmu rapl_pmu_class; @@ -158,6 +160,48 @@ again: return new_raw_count; } +static void rapl_start_hrtimer(struct rapl_pmu *pmu) +{ + __hrtimer_start_range_ns(&pmu->hrtimer, + pmu->timer_interval, 0, + HRTIMER_MODE_REL_PINNED, 0); +} + +static void rapl_stop_hrtimer(struct rapl_pmu *pmu) +{ + hrtimer_cancel(&pmu->hrtimer); +} + +static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct perf_event *event; + unsigned long flags; + + if (!pmu->n_active) + return HRTIMER_NORESTART; + + spin_lock_irqsave(&pmu->lock, flags); + + list_for_each_entry(event, &pmu->active_list, active_entry) { + rapl_event_update(event); + } + + spin_unlock_irqrestore(&pmu->lock, flags); + + hrtimer_forward_now(hrtimer, pmu->timer_interval); + + return HRTIMER_RESTART; +} + +static void rapl_hrtimer_init(struct rapl_pmu *pmu) +{ + struct hrtimer *hr = &pmu->hrtimer; + + hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hr->function = rapl_hrtimer_handle; +} + static void __rapl_pmu_event_start(struct rapl_pmu *pmu, struct perf_event *event) { @@ -171,6 +215,8 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, local64_set(&event->hw.prev_count, rapl_read_counter(event)); pmu->n_active++; + if (pmu->n_active == 1) + rapl_start_hrtimer(pmu); } static void rapl_pmu_event_start(struct perf_event *event, int mode) @@ -195,6 +241,8 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) if (!(hwc->state & PERF_HES_STOPPED)) { WARN_ON_ONCE(pmu->n_active <= 0); pmu->n_active--; + if (pmu->n_active == 0) + rapl_stop_hrtimer(pmu); list_del(&event->active_entry); @@ -423,6 +471,9 @@ static void rapl_cpu_exit(int cpu) */ if (target >= 0) perf_pmu_migrate_context(pmu->pmu, cpu, target); + + /* cancel overflow polling timer for CPU */ + rapl_stop_hrtimer(pmu); } static void rapl_cpu_init(int cpu) @@ -442,6 +493,7 @@ static int rapl_cpu_prepare(int cpu) { struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); int phys_id = topology_physical_package_id(cpu); + u64 ms; if (pmu) return 0; @@ -466,6 +518,22 @@ static int rapl_cpu_prepare(int cpu) pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL; pmu->pmu = &rapl_pmu_class; + /* + * use reference of 200W for scaling the timeout + * to avoid missing counter overflows. + * 200W = 200 Joules/sec + * divide interval by 2 to avoid lockstep (2 * 100) + * if hw unit is 32, then we use 2 ms 1/200/2 + */ + if (pmu->hw_unit < 32) + ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); + else + ms = 2; + + pmu->timer_interval = ms_to_ktime(ms); + + rapl_hrtimer_init(pmu); + /* set RAPL pmu for this cpu for now */ per_cpu(rapl_pmu, cpu) = pmu; per_cpu(rapl_pmu_to_free, cpu) = NULL; @@ -580,9 +648,11 @@ static int __init rapl_pmu_init(void) pr_info("RAPL PMU detected, hw unit 2^-%d Joules," " API unit is 2^-32 Joules," - " %d fixed counters\n", + " %d fixed counters" + " %llu ms ovfl timer\n", pmu->hw_unit, - hweight32(rapl_cntr_mask)); + hweight32(rapl_cntr_mask), + ktime_to_ms(pmu->timer_interval)); put_online_cpus(); From 12e55569a244996a23cb401e8116e5a060b664f0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 19 Nov 2013 18:29:37 -0500 Subject: [PATCH 096/981] tools lib traceevent: Use helper trace-seq in print functions like kernel does Jiri Olsa reported that his plugin for scsi was chopping off part of the output. Investigating this, I found that Jiri used the same functions as what is in the kernel, which adds the following: trace_seq_putc(p, 0); This adds a '\0' to the output string. The reason this works in the kernel is that the "p" that is passed to the function helper is a temporary trace_seq. But in the libtraceevent library, it's the pointer to the trace_seq used to output. By adding the '\0', it truncates the line and nothing added after that will be printed. We can solve this in two ways. One is to have the helper functions for the library not add the unnecessary '\0'. The other is to change the library to also use a helper trace_seq structure that gets copied to the main trace_seq just like the kernel does. The latter allows the helper functions in the plugins to be the same as the kernel, which is the better solution. Signed-off-by: Steven Rostedt Reported-by: Jiri Olsa Tested-by: Jiri Olsa Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131119182937.401668e3@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 217c82ee3665..900fca01bdd3 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4099,6 +4099,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event unsigned long long val; struct func_map *func; const char *saveptr; + struct trace_seq p; char *bprint_fmt = NULL; char format[32]; int show_func; @@ -4306,8 +4307,12 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event format[len] = 0; if (!len_as_arg) len_arg = -1; - print_str_arg(s, data, size, event, + /* Use helper trace_seq */ + trace_seq_init(&p); + print_str_arg(&p, data, size, event, format, len_arg, arg); + trace_seq_terminate(&p); + trace_seq_puts(s, p.buffer); arg = arg->next; break; default: From 15e65c693d7de4c83d1e97fd89f87d47f2219782 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 14 Nov 2013 18:43:30 +0200 Subject: [PATCH 097/981] perf trace: Remove thread summary coloring Thread summary line coloring looks ugly. It doesn't add much value so remove coloring completely. Signed-off-by: Pekka Enberg Acked-by: David Ahern Link: http://lkml.kernel.org/r/1384447410-1771-1-git-send-email-penberg@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8be17fc462ba..e9f345e2551a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2158,7 +2158,6 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv) size_t printed = data->printed; struct trace *trace = data->trace; struct thread_trace *ttrace = thread->priv; - const char *color; double ratio; if (ttrace == NULL) @@ -2166,17 +2165,9 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv) ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; - color = PERF_COLOR_NORMAL; - if (ratio > 50.0) - color = PERF_COLOR_RED; - else if (ratio > 25.0) - color = PERF_COLOR_GREEN; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - printed += color_fprintf(fp, color, " %s (%d), ", thread__comm_str(thread), thread->tid); + printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); printed += fprintf(fp, "%lu events, ", ttrace->nr_events); - printed += color_fprintf(fp, color, "%.1f%%", ratio); + printed += fprintf(fp, "%.1f%%", ratio); printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); printed += thread__dump_stats(ttrace, trace, fp); From bf80669e4f689f181f23a54dfe2a0f264147ad67 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 14 Nov 2013 20:51:30 -0700 Subject: [PATCH 098/981] perf top: Make -g refer to callchains In most commands -g is used for callchains. Make perf-top follow suit. Move group to just --group with no short cut making it similar to perf-record. Signed-off-by: David Ahern Acked-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1384487490-6865-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 5 ++--- tools/perf/builtin-top.c | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 7de01dd79688..cdd8d4946dba 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -50,7 +50,6 @@ Default is to monitor all CPUS. --count-filter=:: Only display functions with more events than this. --g:: --group:: Put the counters into a counter group. @@ -143,12 +142,12 @@ Default is to monitor all CPUS. --asm-raw:: Show raw instruction encoding of assembly instructions. --G:: +-g:: Enables call-graph (stack chain/backtrace) recording. --call-graph:: Setup and enable call-graph (stack chain/backtrace) recording, - implies -G. + implies -g. --max-stack:: Set the stack depth limit when parsing the callchain, anything diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 71e6402729a8..531522d3d97b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1084,7 +1084,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "dump the symbol table used for profiling"), OPT_INTEGER('f', "count-filter", &top.count_filter, "only display functions with more events than this"), - OPT_BOOLEAN('g', "group", &opts->group, + OPT_BOOLEAN(0, "group", &opts->group, "put the counters into a counter group"), OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit, "child tasks do not inherit counters"), @@ -1105,7 +1105,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) " abort, in_tx, transaction"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), - OPT_CALLBACK_NOOPT('G', NULL, &top.record_opts, + OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts, NULL, "enables call-graph recording", &callchain_opt), OPT_CALLBACK(0, "call-graph", &top.record_opts, From 2cf025e69543f5f4aa68e8549d60680515fef5ad Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Sun, 17 Nov 2013 21:43:23 +0530 Subject: [PATCH 099/981] perf completion: Introduce a layer of indirection Define the variables cur, words, cword, and prev outside the main completion function so that we have a chance to override it when we introduce zsh support. Signed-off-by: Ramkumar Ramachandra Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1384704807-15779-2-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bash_completion | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index 62e157db2e2b..3efdc84f112c 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -89,15 +89,12 @@ __ltrim_colon_completions() fi } -type perf &>/dev/null && -_perf() +__perf_main () { - local cur words cword prev cmd - - COMPREPLY=() - _get_comp_words_by_ref -n =: cur words cword prev + local cmd cmd=${words[0]} + COMPREPLY=() # List perf subcommands or long options if [ $cword -eq 1 ]; then @@ -120,6 +117,14 @@ _perf() opts=$($cmd $subcmd --list-opts) COMPREPLY=( $( compgen -W '$opts' -- "$cur" ) ) fi +} + +type perf &>/dev/null && +_perf() +{ + local cur words cword prev + _get_comp_words_by_ref -n =: cur words cword prev + __perf_main } && complete -o bashdefault -o default -o nospace -F _perf perf 2>/dev/null \ From 12f9dd5042483698c74a133d9004ff1d1a6474f9 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Sun, 17 Nov 2013 21:43:24 +0530 Subject: [PATCH 100/981] perf completion: Factor out compgen stuff compgen is a bash-builtin; factor out the invocations into a separate function to give us a chance to override it with a zsh equivalent in future patches. Signed-off-by: Ramkumar Ramachandra Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1384704807-15779-3-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bash_completion | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index 3efdc84f112c..824312681601 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -89,6 +89,11 @@ __ltrim_colon_completions() fi } +__perfcomp () +{ + COMPREPLY=( $( compgen -W "$1" -- "$2" ) ) +} + __perf_main () { local cmd @@ -99,23 +104,23 @@ __perf_main () # List perf subcommands or long options if [ $cword -eq 1 ]; then if [[ $cur == --* ]]; then - COMPREPLY=( $( compgen -W '--help --version \ + __perfcomp '--help --version \ --exec-path --html-path --paginate --no-pager \ - --perf-dir --work-tree --debugfs-dir' -- "$cur" ) ) + --perf-dir --work-tree --debugfs-dir' -- "$cur" else cmds=$($cmd --list-cmds) - COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) ) + __perfcomp "$cmds" "$cur" fi # List possible events for -e option elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) - COMPREPLY=( $( compgen -W '$evts' -- "$cur" ) ) + __perfcomp "$evts" "$cur" __ltrim_colon_completions $cur # List long option names elif [[ $cur == --* ]]; then subcmd=${words[1]} opts=$($cmd $subcmd --list-opts) - COMPREPLY=( $( compgen -W '$opts' -- "$cur" ) ) + __perfcomp "$opts" "$cur" fi } From 37e72c31061521d6f0e4b7fe47cd5748280ed691 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Sun, 17 Nov 2013 21:43:25 +0530 Subject: [PATCH 101/981] perf completion: Factor out call to __ltrim_colon_completions In our sole callsite, __ltrim_colon_completions is called after __perfcomp, to modify the COMPREPLY set by the invocation. This is problematic, because in the zsh equivalent (using compset/ compadd), we'll have to generate completions in one-shot. So factor out this entire callsite into a special override'able __perfcomp_colon function; we will override it when introducing zsh support. Signed-off-by: Ramkumar Ramachandra Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1384704807-15779-4-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bash_completion | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index 824312681601..573599b42be2 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -94,6 +94,12 @@ __perfcomp () COMPREPLY=( $( compgen -W "$1" -- "$2" ) ) } +__perfcomp_colon () +{ + __perfcomp "$1" "$2" + __ltrim_colon_completions $cur +} + __perf_main () { local cmd @@ -114,8 +120,7 @@ __perf_main () # List possible events for -e option elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) - __perfcomp "$evts" "$cur" - __ltrim_colon_completions $cur + __perfcomp_colon "$evts" "$cur" # List long option names elif [[ $cur == --* ]]; then subcmd=${words[1]} From f38ab8af794c184c15f5e001d0eaa16f4a120978 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Sun, 17 Nov 2013 21:43:26 +0530 Subject: [PATCH 102/981] perf completion: Introduce zsh support __perfcomp(), __perfcomp_colon(), and _perf() have to be overridden. Inspired by the way the git.git completion system is structured. Signed-off-by: Ramkumar Ramachandra Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1384704807-15779-5-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bash_completion | 63 +++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion index 573599b42be2..49494882d9bb 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/bash_completion @@ -1,4 +1,4 @@ -# perf completion +# perf bash and zsh completion # Taken from git.git's completion script. __my_reassemble_comp_words_by_ref() @@ -129,6 +129,67 @@ __perf_main () fi } +if [[ -n ${ZSH_VERSION-} ]]; then + autoload -U +X compinit && compinit + + __perfcomp () + { + emulate -L zsh + + local c IFS=$' \t\n' + local -a array + + for c in ${=1}; do + case $c in + --*=*|*.) ;; + *) c="$c " ;; + esac + array[${#array[@]}+1]="$c" + done + + compset -P '*[=:]' + compadd -Q -S '' -a -- array && _ret=0 + } + + __perfcomp_colon () + { + emulate -L zsh + + local cur_="${2-$cur}" + local c IFS=$' \t\n' + local -a array + + if [[ "$cur_" == *:* ]]; then + local colon_word=${cur_%"${cur_##*:}"} + fi + + for c in ${=1}; do + case $c in + --*=*|*.) ;; + *) c="$c " ;; + esac + array[$#array+1]=${c#"$colon_word"} + done + + compset -P '*[=:]' + compadd -Q -S '' -a -- array && _ret=0 + } + + _perf () + { + local _ret=1 cur cword prev + cur=${words[CURRENT]} + prev=${words[CURRENT-1]} + let cword=CURRENT-1 + emulate ksh -c __perf_main + let _ret && _default && _ret=0 + return _ret + } + + compdef _perf perf + return +fi + type perf &>/dev/null && _perf() { From a8b4c7014cadfdacd4e1f4c963128593be6f20de Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Sun, 17 Nov 2013 21:43:27 +0530 Subject: [PATCH 103/981] perf completion: Rename file to reflect zsh support Signed-off-by: Ramkumar Ramachandra Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1384704807-15779-6-git-send-email-artagnon@gmail.com [ Fix 'make install' target ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 4 ++-- tools/perf/{bash_completion => perf-completion.sh} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename tools/perf/{bash_completion => perf-completion.sh} (100%) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 7fc8f179cae7..e416ccc7d831 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -840,9 +840,9 @@ ifndef NO_LIBPYTHON $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \ $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' endif - $(call QUIET_INSTALL, bash_completion-script) \ + $(call QUIET_INSTALL, perf_completion-script) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \ - $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' + $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' $(call QUIET_INSTALL, tests) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ diff --git a/tools/perf/bash_completion b/tools/perf/perf-completion.sh similarity index 100% rename from tools/perf/bash_completion rename to tools/perf/perf-completion.sh From e944d3d7d151eea149c62310eaff7b92c7732f58 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Nov 2013 14:34:52 +0900 Subject: [PATCH 104/981] perf script: Move evname print code to process_event() The print_sample_start() will be reused by other printing routine for internal events like COMM, FORK and EXIT from next patch. And because they're not tied to a specific event, move the evname print code to its caller. Signed-off-by: Namhyung Kim Reviewed-by: David Ahern Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1384752894-10974-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index baf17989a216..b392770766dd 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -288,7 +288,6 @@ static void print_sample_start(struct perf_sample *sample, struct perf_evsel *evsel) { struct perf_event_attr *attr = &evsel->attr; - const char *evname = NULL; unsigned long secs; unsigned long usecs; unsigned long long nsecs; @@ -323,11 +322,6 @@ static void print_sample_start(struct perf_sample *sample, usecs = nsecs / NSECS_PER_USEC; printf("%5lu.%06lu: ", secs, usecs); } - - if (PRINT_FIELD(EVNAME)) { - evname = perf_evsel__name(evsel); - printf("%s: ", evname ? evname : "[unknown]"); - } } static bool is_bts_event(struct perf_event_attr *attr) @@ -434,6 +428,11 @@ static void process_event(union perf_event *event, struct perf_sample *sample, print_sample_start(sample, thread, evsel); + if (PRINT_FIELD(EVNAME)) { + const char *evname = perf_evsel__name(evsel); + printf("%s: ", evname ? evname : "[unknown]"); + } + if (is_bts_event(attr)) { print_sample_bts(event, sample, evsel, machine, thread); return; From 3aa5939d71fa22a947808ba9c798b8537c35097a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2013 15:52:29 +0200 Subject: [PATCH 105/981] perf record: Make per-cpu mmaps the default. This affects the -p, -t and -u options that previously defaulted to per-thread mmaps. Consequently add an option to select per-thread mmaps to support the old behaviour. Note that per-thread can be used with a workload-only (i.e. none of -p, -t, -u, -a or -C is selected) to get a per-thread mmap with no inheritance. Signed-off-by: Adrian Hunter Acked-by: Ingo Molnar Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/5286271D.3020808@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 10 +++++----- tools/perf/builtin-record.c | 5 +++-- tools/perf/tests/attr/test-record-no-inherit | 2 +- tools/perf/util/evlist.c | 6 ++++-- tools/perf/util/evsel.c | 5 +++-- tools/perf/util/target.c | 11 ++++++++++- tools/perf/util/target.h | 4 +++- 7 files changed, 29 insertions(+), 14 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 43b42c4f4a91..6ac867e7667f 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -201,11 +201,11 @@ abort events and some memory events in precise mode on modern Intel CPUs. --transaction:: Record transaction flags for transaction related events. ---force-per-cpu:: -Force the use of per-cpu mmaps. By default, when tasks are specified (i.e. -p, --t or -u options) per-thread mmaps are created. This option overrides that and -forces per-cpu mmaps. A side-effect of that is that inheritance is -automatically enabled. Add the -i option also to disable inheritance. +--per-thread:: +Use per-thread mmaps. By default per-cpu mmaps are created. This option +overrides that and uses per-thread mmaps. A side-effect of that is that +inheritance is automatically disabled. --per-thread is ignored with a warning +if combined with -a or -C options. SEE ALSO -------- diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7c8020a32784..f5b18b8fe8c9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -800,6 +800,7 @@ static struct perf_record record = { .freq = 4000, .target = { .uses_mmap = true, + .default_per_cpu = true, }, }, }; @@ -888,8 +889,8 @@ const struct option record_options[] = { "sample by weight (on special events only)"), OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, "sample transaction flags (special events only)"), - OPT_BOOLEAN(0, "force-per-cpu", &record.opts.target.force_per_cpu, - "force the use of per-cpu mmaps"), + OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, + "use per-thread mmaps"), OPT_END() }; diff --git a/tools/perf/tests/attr/test-record-no-inherit b/tools/perf/tests/attr/test-record-no-inherit index 9079a25cd643..44edcb2edcd5 100644 --- a/tools/perf/tests/attr/test-record-no-inherit +++ b/tools/perf/tests/attr/test-record-no-inherit @@ -3,5 +3,5 @@ command = record args = -i kill >/dev/null 2>&1 [event:base-record] -sample_type=259 +sample_type=263 inherit=0 diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index bbc746aa5716..76fa76431329 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -819,8 +819,10 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->threads == NULL) return -1; - if (target->force_per_cpu) - evlist->cpus = cpu_map__new(target->cpu_list); + if (target->default_per_cpu) + evlist->cpus = target->per_thread ? + cpu_map__dummy_new() : + cpu_map__new(target->cpu_list); else if (target__has_task(target)) evlist->cpus = cpu_map__dummy_new(); else if (!target__has_cpu(target) && !target->uses_mmap) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dad64926170f..b5fe7f9b2e15 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -574,6 +574,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_evsel *leader = evsel->leader; struct perf_event_attr *attr = &evsel->attr; int track = !evsel->idx; /* only the first counter needs these */ + bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread; attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; attr->inherit = !opts->no_inherit; @@ -647,7 +648,7 @@ void perf_evsel__config(struct perf_evsel *evsel, } } - if (target__has_cpu(&opts->target) || opts->target.force_per_cpu) + if (target__has_cpu(&opts->target)) perf_evsel__set_sample_bit(evsel, CPU); if (opts->period) @@ -655,7 +656,7 @@ void perf_evsel__config(struct perf_evsel *evsel, if (!perf_missing_features.sample_id_all && (opts->sample_time || !opts->no_inherit || - target__has_cpu(&opts->target) || opts->target.force_per_cpu)) + target__has_cpu(&opts->target) || per_cpu)) perf_evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples) { diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 3c778a07b7cc..e74c5963dc7a 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -55,6 +55,13 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM; } + /* THREAD and SYSTEM/CPU are mutually exclusive */ + if (target->per_thread && (target->system_wide || target->cpu_list)) { + target->per_thread = false; + if (ret == TARGET_ERRNO__SUCCESS) + ret = TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD; + } + return ret; } @@ -100,6 +107,7 @@ static const char *target__error_str[] = { "UID switch overriding CPU", "PID/TID switch overriding SYSTEM", "UID switch overriding SYSTEM", + "SYSTEM/CPU switch overriding PER-THREAD", "Invalid User: %s", "Problems obtaining information for user %s", }; @@ -131,7 +139,8 @@ int target__strerror(struct target *target, int errnum, msg = target__error_str[idx]; switch (errnum) { - case TARGET_ERRNO__PID_OVERRIDE_CPU ... TARGET_ERRNO__UID_OVERRIDE_SYSTEM: + case TARGET_ERRNO__PID_OVERRIDE_CPU ... + TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD: snprintf(buf, buflen, "%s", msg); break; diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 2d0c50690892..31dd2e9a27d0 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -12,7 +12,8 @@ struct target { uid_t uid; bool system_wide; bool uses_mmap; - bool force_per_cpu; + bool default_per_cpu; + bool per_thread; }; enum target_errno { @@ -33,6 +34,7 @@ enum target_errno { TARGET_ERRNO__UID_OVERRIDE_CPU, TARGET_ERRNO__PID_OVERRIDE_SYSTEM, TARGET_ERRNO__UID_OVERRIDE_SYSTEM, + TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD, /* for target__parse_uid() */ TARGET_ERRNO__INVALID_UID, From 4bc437964ef540462bd15af4a713da62961809aa Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 18 Nov 2013 11:55:55 +0200 Subject: [PATCH 106/981] perf tools: Allow '--inherit' as the negation of '--no-inherit' Long options can be negated by prefixing them with 'no-'. However options that already start with 'no-', such as '--no-inherit' result in ugly double 'no's. Avoid that by accepting that the removal of 'no-' also negates the long option. Suggested-by: Peter Zijlstra Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1384768557-23331-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-options.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 31f404a032a9..b6b39ff14fbb 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -224,6 +224,24 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, return 0; } if (!rest) { + if (!prefixcmp(options->long_name, "no-")) { + /* + * The long name itself starts with "no-", so + * accept the option without "no-" so that users + * do not have to enter "no-no-" to get the + * negation. + */ + rest = skip_prefix(arg, options->long_name + 3); + if (rest) { + flags |= OPT_UNSET; + goto match; + } + /* Abbreviated case */ + if (!prefixcmp(options->long_name + 3, arg)) { + flags |= OPT_UNSET; + goto is_abbreviated; + } + } /* abbreviated? */ if (!strncmp(options->long_name, arg, arg_end - arg)) { is_abbreviated: @@ -259,6 +277,7 @@ is_abbreviated: if (!rest) continue; } +match: if (*rest) { if (*rest != '=') continue; From 167faf32b07fc47637048fbcbdfcf4a89481686d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 18 Nov 2013 11:55:56 +0200 Subject: [PATCH 107/981] perf tools: Add option macro OPT_BOOLEAN_SET OPT_BOOLEAN_SET records whether a boolean option was set by the user. That information can be used to change the default value for the option after the options have been parsed. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1384768557-23331-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-options.c | 2 ++ tools/perf/util/parse-options.h | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index b6b39ff14fbb..d22e3f8017dc 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -78,6 +78,8 @@ static int get_value(struct parse_opt_ctx_t *p, case OPTION_BOOLEAN: *(bool *)opt->value = unset ? false : true; + if (opt->set) + *(bool *)opt->set = true; return 0; case OPTION_INCR: diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index b0241e28eaf7..cbf0149cf221 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -82,6 +82,9 @@ typedef int parse_opt_cb(const struct option *, const char *arg, int unset); * OPTION_{BIT,SET_UINT,SET_PTR} store the {mask,integer,pointer} to put in * the value when met. * CALLBACKS can use it like they want. + * + * `set`:: + * whether an option was set by the user */ struct option { enum parse_opt_type type; @@ -94,6 +97,7 @@ struct option { int flags; parse_opt_cb *callback; intptr_t defval; + bool *set; }; #define check_vtype(v, type) ( BUILD_BUG_ON_ZERO(!__builtin_types_compatible_p(typeof(v), type)) + v ) @@ -103,6 +107,10 @@ struct option { #define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } #define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) } #define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h) } +#define OPT_BOOLEAN_SET(s, l, v, os, h) \ + { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), \ + .value = check_vtype(v, bool *), .help = (h), \ + .set = check_vtype(os, bool *)} #define OPT_INCR(s, l, v, h) { .type = OPTION_INCR, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) } #define OPT_SET_UINT(s, l, v, h, i) { .type = OPTION_SET_UINT, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h), .defval = (i) } #define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) } From 69e7e5b02bc6a9e5cf4a54911b27ca133cc1f99f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 18 Nov 2013 11:55:57 +0200 Subject: [PATCH 108/981] perf record: Default -t option to no inheritance The change to per-cpu mmaps causes the -p, -t and -u options now to have inheritance enabled by default. Change that back to no inheritance but for the -t option only. Requested-by: Peter Zijlstra Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1384768557-23331-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 2 ++ tools/perf/builtin-record.c | 8 ++++++-- tools/perf/perf.h | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 6ac867e7667f..c407897f0435 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -57,6 +57,8 @@ OPTIONS -t:: --tid=:: Record events on existing thread ID (comma separated list). + This option also disables inheritance by default. Enable it by adding + --inherit. -u:: --uid=:: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f5b18b8fe8c9..65615a8bc25e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -843,8 +843,9 @@ const struct option record_options[] = { OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), OPT_STRING('o', "output", &record.file.path, "file", "output file name"), - OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit, - "child tasks do not inherit counters"), + OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, + &record.opts.no_inherit_set, + "child tasks do not inherit counters"), OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages", "number of mmap data pages", @@ -939,6 +940,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) goto out_symbol_exit; } + if (rec->opts.target.tid && !rec->opts.no_inherit_set) + rec->opts.no_inherit = true; + err = target__validate(&rec->opts.target); if (err) { target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index b079304bd53d..b23fed527514 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -254,6 +254,7 @@ struct perf_record_opts { bool inherit_stat; bool no_delay; bool no_inherit; + bool no_inherit_set; bool no_samples; bool raw_samples; bool sample_address; From 0a8eb275cbdb8462854d5f7e1168d86cee4cc9ea Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 1 Nov 2013 20:25:45 +0400 Subject: [PATCH 109/981] perf timechart: Always try to print at least 15 tasks Always try to print at least 15 tasks no matter how long they run. Signed-off-by: Stanislav Fomichev Acked-by: Namhyung Kim Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383323151-19810-2-git-send-email-stfomichev@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-timechart.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 41c9bde2fb67..bb21e57ff9bb 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -945,15 +945,17 @@ static void write_svg_file(const char *filename) { u64 i; int count; + int thresh = TIME_THRESH; numcpus++; - count = determine_display_tasks(TIME_THRESH); - - /* We'd like to show at least 15 tasks; be less picky if we have fewer */ - if (count < 15) - count = determine_display_tasks(TIME_THRESH / 10); + /* We'd like to show at least proc_num tasks; + * be less picky if we have fewer */ + do { + count = determine_display_tasks(thresh); + thresh /= 10; + } while (!process_filter && thresh && count < 15); open_svg(filename, numcpus, count, first_time, last_time); From 54874e3236b834064943c02a647823ab5d97be57 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 1 Nov 2013 20:25:46 +0400 Subject: [PATCH 110/981] perf timechart: Add option to limit number of tasks Add -n option to specify min. number of tasks to print. Signed-off-by: Stanislav Fomichev Acked-by: Namhyung Kim Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383323151-19810-3-git-send-email-stfomichev@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-timechart.txt | 4 ++++ tools/perf/builtin-timechart.c | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index 3ff8bd4f0b4d..4373144a43b4 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -54,6 +54,10 @@ $ perf timechart Written 10.2 seconds of trace to output.svg. +-n:: +--proc-num:: + Print task info for at least given number of tasks. + SEE ALSO -------- linkperf:perf-record[1] diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index bb21e57ff9bb..c352be418f98 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -41,6 +41,7 @@ #define SUPPORT_OLD_POWER_EVENTS 1 #define PWR_EVENT_EXIT -1 +static int proc_num = 15; static unsigned int numcpus; static u64 min_freq; /* Lowest CPU frequency seen */ @@ -955,7 +956,7 @@ static void write_svg_file(const char *filename) do { count = determine_display_tasks(thresh); thresh /= 10; - } while (!process_filter && thresh && count < 15); + } while (!process_filter && thresh && count < proc_num); open_svg(filename, numcpus, count, first_time, last_time); @@ -1102,6 +1103,8 @@ int cmd_timechart(int argc, const char **argv, parse_process), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), + OPT_INTEGER('n', "proc-num", &proc_num, + "min. number of tasks to print"), OPT_END() }; const char * const timechart_usage[] = { From 753c505dc49a87a4421d452bda048e4b93e8e42b Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 1 Nov 2013 20:25:47 +0400 Subject: [PATCH 111/981] perf timechart: Use proc_num to implement --power-only Don't use special flag to indicate power-only mode, just set proc_num to 0. Signed-off-by: Stanislav Fomichev Acked-by: Namhyung Kim Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383323151-19810-4-git-send-email-stfomichev@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-timechart.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index c352be418f98..6410c9ead9e2 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -912,7 +912,7 @@ static int determine_display_tasks(u64 threshold) /* no exit marker, task kept running to the end */ if (p->end_time == 0) p->end_time = last_time; - if (p->total_time >= threshold && !power_only) + if (p->total_time >= threshold) p->display = 1; c = p->all; @@ -923,7 +923,7 @@ static int determine_display_tasks(u64 threshold) if (c->start_time == 1) c->start_time = first_time; - if (c->total_time >= threshold && !power_only) { + if (c->total_time >= threshold) { c->display = 1; count++; } @@ -950,6 +950,8 @@ static void write_svg_file(const char *filename) numcpus++; + if (power_only) + proc_num = 0; /* We'd like to show at least proc_num tasks; * be less picky if we have fewer */ @@ -967,9 +969,11 @@ static void write_svg_file(const char *filename) svg_cpu_box(i, max_freq, turbo_frequency); draw_cpu_usage(); - draw_process_bars(); + if (proc_num) + draw_process_bars(); draw_c_p_states(); - draw_wakeups(); + if (proc_num) + draw_wakeups(); svg_close(); } From c87097d39dae1c42a5068e00dd3b76a4162ee0fc Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 1 Nov 2013 20:25:48 +0400 Subject: [PATCH 112/981] perf timechart: Add support for displaying only tasks related data In order to make SVG smaller and faster to browse add possibility to switch off power related information with -T switch. Signed-off-by: Stanislav Fomichev Acked-by: Namhyung Kim Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383323151-19810-5-git-send-email-stfomichev@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-timechart.txt | 3 +++ tools/perf/builtin-timechart.c | 11 ++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index 4373144a43b4..d8441894d9ec 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -35,6 +35,9 @@ OPTIONS -P:: --power-only:: Only output the CPU power section of the diagram +-T:: +--tasks-only:: + Don't output processor state transitions -p:: --process:: Select the processes to display, by name or PID diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 6410c9ead9e2..b3f175a30d94 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -51,6 +51,7 @@ static u64 turbo_frequency; static u64 first_time, last_time; static bool power_only; +static bool tasks_only; struct per_pid; @@ -971,7 +972,8 @@ static void write_svg_file(const char *filename) draw_cpu_usage(); if (proc_num) draw_process_bars(); - draw_c_p_states(); + if (!tasks_only) + draw_c_p_states(); if (proc_num) draw_wakeups(); @@ -1102,6 +1104,8 @@ int cmd_timechart(int argc, const char **argv, OPT_STRING('o', "output", &output_name, "file", "output file name"), OPT_INTEGER('w', "width", &svg_page_width, "page width"), OPT_BOOLEAN('P', "power-only", &power_only, "output power data only"), + OPT_BOOLEAN('T', "tasks-only", &tasks_only, + "output processes data only"), OPT_CALLBACK('p', "process", NULL, "process", "process selector. Pass a pid or process name.", parse_process), @@ -1119,6 +1123,11 @@ int cmd_timechart(int argc, const char **argv, argc = parse_options(argc, argv, options, timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (power_only && tasks_only) { + pr_err("-P and -T options cannot be used at the same time.\n"); + return -1; + } + symbol__init(); if (argc && !strncmp(argv[0], "rec", 3)) From cbb2e81e5232b1bca5cd2aa1d7a7eb1cd30f8304 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 1 Nov 2013 20:25:49 +0400 Subject: [PATCH 113/981] perf timechart: Group figures and add title with details Add titles to figures so we can run SVG interactively in Firefox and check event details in the tooltips. This also aids exploring SVG with Inkscape because when user clicks on one part of logical figure, all parts are selected. It's also possible to read titles with Inkscape in the object details. Signed-off-by: Stanislav Fomichev Acked-by: Namhyung Kim Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1383323151-19810-6-git-send-email-stfomichev@yandex-team.ru Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-timechart.c | 6 ++-- tools/perf/util/svghelper.c | 56 ++++++++++++++++++++++++++++++++-- tools/perf/util/svghelper.h | 5 +-- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index b3f175a30d94..6a848b8c4c16 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -798,11 +798,11 @@ static void draw_process_bars(void) sample = c->samples; while (sample) { if (sample->type == TYPE_RUNNING) - svg_sample(Y, sample->cpu, sample->start_time, sample->end_time); + svg_running(Y, sample->cpu, sample->start_time, sample->end_time); if (sample->type == TYPE_BLOCKED) - svg_box(Y, sample->start_time, sample->end_time, "blocked"); + svg_blocked(Y, sample->cpu, sample->start_time, sample->end_time); if (sample->type == TYPE_WAITING) - svg_waiting(Y, sample->start_time, sample->end_time); + svg_waiting(Y, sample->cpu, sample->start_time, sample->end_time); sample = sample->next; } diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 96c866045d60..9a5b41392e01 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -95,6 +95,7 @@ void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end) total_height = (1 + rows + cpu2slot(cpus)) * SLOT_MULT; fprintf(svgfile, " \n"); + fprintf(svgfile, "\n"); fprintf(svgfile, "\n", svg_page_width, total_height); fprintf(svgfile, "\n