diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index fb41a187647..ce655f7b06e 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -40,6 +40,7 @@ Features: - soft limit - moving(recharging) account at moving a task is selectable. - usage threshold notifier + - memory pressure notifier - oom-killer disable knob and oom-notifier - Root cgroup has no limit controls. @@ -65,6 +66,7 @@ Brief summary of control files. memory.stat # show various statistics memory.use_hierarchy # set/show hierarchical account enabled memory.force_empty # trigger forced move charge to parent + memory.pressure_level # set memory pressure notifications memory.swappiness # set/show swappiness parameter of vmscan (See sysctl's vm.swappiness) memory.move_charge_at_immigrate # set/show controls of moving charges @@ -700,7 +702,73 @@ At reading, current status of OOM is shown. under_oom 0 or 1 (if 1, the memory cgroup is under OOM, tasks may be stopped.) -11. TODO +11. Memory Pressure + +The pressure level notifications can be used to monitor the memory +allocation cost; based on the pressure, applications can implement +different strategies of managing their memory resources. The pressure +levels are defined as following: + +The "low" level means that the system is reclaiming memory for new +allocations. Monitoring this reclaiming activity might be useful for +maintaining cache level. Upon notification, the program (typically +"Activity Manager") might analyze vmstat and act in advance (i.e. +prematurely shutdown unimportant services). + +The "medium" level means that the system is experiencing medium memory +pressure, the system might be making swap, paging out active file caches, +etc. Upon this event applications may decide to further analyze +vmstat/zoneinfo/memcg or internal memory usage statistics and free any +resources that can be easily reconstructed or re-read from a disk. + +The "critical" level means that the system is actively thrashing, it is +about to out of memory (OOM) or even the in-kernel OOM killer is on its +way to trigger. Applications should do whatever they can to help the +system. It might be too late to consult with vmstat or any other +statistics, so it's advisable to take an immediate action. + +The events are propagated upward until the event is handled, i.e. the +events are not pass-through. Here is what this means: for example you have +three cgroups: A->B->C. Now you set up an event listener on cgroups A, B +and C, and suppose group C experiences some pressure. In this situation, +only group C will receive the notification, i.e. groups A and B will not +receive it. This is done to avoid excessive "broadcasting" of messages, +which disturbs the system and which is especially bad if we are low on +memory or thrashing. So, organize the cgroups wisely, or propagate the +events manually (or, ask us to implement the pass-through events, +explaining why would you need them.) + +The file memory.pressure_level is only used to setup an eventfd. To +register a notification, an application must: + +- create an eventfd using eventfd(2); +- open memory.pressure_level; +- write string like " " + to cgroup.event_control. + +Application will be notified through eventfd when memory pressure is at +the specific level (or higher). Read/write operations to +memory.pressure_level are no implemented. + +Test: + + Here is a small script example that makes a new cgroup, sets up a + memory limit, sets up a notification in the cgroup and then makes child + cgroup experience a critical pressure: + + # cd /sys/fs/cgroup/memory/ + # mkdir foo + # cd foo + # cgroup_event_listener memory.pressure_level low & + # echo 8000000 > memory.limit_in_bytes + # echo 8000000 > memory.memsw.limit_in_bytes + # echo $$ > tasks + # dd if=/dev/zero | read x + + (Expect a bunch of notifications, and eventually, the oom-killer will + trigger.) + +12. TODO 1. Add support for accounting huge pages (as a separate controller) 2. Make per-cgroup scanner reclaim not-shared pages first diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 60252dee8a6..4b519c891b1 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -438,6 +438,7 @@ and a page is modified, the file page is replaced by a private anonymous copy. "Swap" shows how much would-be-anonymous memory is also used, but out on swap. "SwapPss" shows proportional swap share of this mapping. + The "Name" field will only be present on a mapping that has been named by userspace, and will show the name passed in by userspace. @@ -456,6 +457,10 @@ To clear the bits for the file mapped pages associated with the process > echo 3 > /proc/PID/clear_refs Any other value written to /proc/PID/clear_refs will have no effect. +To reset the peak resident set size ("high water mark") to the process's +current value: + > echo 5 > /proc/PID/clear_refs + The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags using /proc/kpageflags and number of times a page is mapped using /proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt. diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt index 53305bd0818..f0f0e072ae1 100644 --- a/Documentation/input/event-codes.txt +++ b/Documentation/input/event-codes.txt @@ -91,6 +91,15 @@ sent in the evdev event stream. event and query the device (using EVIOCG* ioctls) to obtain its current state. +* SYN_TIME_SEC, SYN_TIME_NSEC: + - Used to convey hardware timestamp associated with the current + event packet. The use of these event codes by hardware drivers + is optional. If used, the hardware driver should send the timestamp + ahead of any other events associated with this packet. The timestamp + should be adjusted to CLOCK_MONOTONIC base. + This becomes useful for drivers of hardware that handle batching + without involving the main CPU. + EV_KEY: ---------- EV_KEY events take the form KEY_ or BTN_. For example, KEY_A is used diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt index 543101c5bf2..71af317669e 100644 --- a/Documentation/input/multi-touch-protocol.txt +++ b/Documentation/input/multi-touch-protocol.txt @@ -80,6 +80,10 @@ Userspace can detect that a driver can report more total contacts than slots by noting that the largest supported BTN_TOOL_*TAP event is larger than the total number of type B slots reported in the absinfo for the ABS_MT_SLOT axis. +Velocity tracking and temporal precision can be improved if device provides +exact timestamp for touches reported through SYN_TIME_SEC and SYN_TIME_NSEC. +The timestamp should be reported ahead of everything else in the packet. + Protocol Example A ------------------ diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b524935539b..7b6fa398374 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2407,6 +2407,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. resume= [SWSUSP] Specify the partition device for software suspend + Format: + {/dev/ | PARTUUID= | : | } resume_offset= [SWSUSP] Specify the offset from the beginning of the partition diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index aef2354436e..34a2ee671ef 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1216,6 +1216,13 @@ router_solicitations - INTEGER routers are present. Default: 3 +use_oif_addrs_only - BOOLEAN + When enabled, the candidate source addresses for destinations + routed via this interface are restricted to the set of addresses + configured on this interface (vis. RFC 6724, section 4). + + Default: false + use_tempaddr - INTEGER Preference for Privacy Extensions (RFC3041). <= 0 : disable Privacy Extensions @@ -1291,6 +1298,19 @@ force_tllao - BOOLEAN race condition where the sender deletes the cached link-layer address prior to receiving a response to a previous solicitation." +optimistic_dad - BOOLEAN + Whether to perform Optimistic Duplicate Address Detection (RFC 4429). + 0: disabled (default) + 1: enabled + +use_optimistic - BOOLEAN + If enabled, do not classify optimistic addresses as deprecated during + source address selection. Preferred addresses will still be chosen + before optimistic addresses, subject to other ranking in the source + address selection algorithm. + 0: disabled (default) + 1: enabled + icmp/*: ratelimit - INTEGER Limit the maximal rates for sending ICMPv6 packets. diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt index f28f9a6f034..e13dafc8e8f 100644 --- a/Documentation/power/suspend-and-cpuhotplug.txt +++ b/Documentation/power/suspend-and-cpuhotplug.txt @@ -29,7 +29,7 @@ More details follow: Write 'mem' to /sys/power/state - syfs file + sysfs file | v Acquire pm_mutex lock diff --git a/Documentation/sync.txt b/Documentation/sync.txt new file mode 100644 index 00000000000..a2d05e7fa19 --- /dev/null +++ b/Documentation/sync.txt @@ -0,0 +1,75 @@ +Motivation: + +In complicated DMA pipelines such as graphics (multimedia, camera, gpu, display) +a consumer of a buffer needs to know when the producer has finished producing +it. Likewise the producer needs to know when the consumer is finished with the +buffer so it can reuse it. A particular buffer may be consumed by multiple +consumers which will retain the buffer for different amounts of time. In +addition, a consumer may consume multiple buffers atomically. +The sync framework adds an API which allows synchronization between the +producers and consumers in a generic way while also allowing platforms which +have shared hardware synchronization primitives to exploit them. + +Goals: + * provide a generic API for expressing synchronization dependencies + * allow drivers to exploit hardware synchronization between hardware + blocks + * provide a userspace API that allows a compositor to manage + dependencies. + * provide rich telemetry data to allow debugging slowdowns and stalls of + the graphics pipeline. + +Objects: + * sync_timeline + * sync_pt + * sync_fence + +sync_timeline: + +A sync_timeline is an abstract monotonically increasing counter. In general, +each driver/hardware block context will have one of these. They can be backed +by the appropriate hardware or rely on the generic sw_sync implementation. +Timelines are only ever created through their specific implementations +(i.e. sw_sync.) + +sync_pt: + +A sync_pt is an abstract value which marks a point on a sync_timeline. Sync_pts +have a single timeline parent. They have 3 states: active, signaled, and error. +They start in active state and transition, once, to either signaled (when the +timeline counter advances beyond the sync_pt’s value) or error state. + +sync_fence: + +Sync_fences are the primary primitives used by drivers to coordinate +synchronization of their buffers. They are a collection of sync_pts which may +or may not have the same timeline parent. A sync_pt can only exist in one fence +and the fence's list of sync_pts is immutable once created. Fences can be +waited on synchronously or asynchronously. Two fences can also be merged to +create a third fence containing a copy of the two fences’ sync_pts. Fences are +backed by file descriptors to allow userspace to coordinate the display pipeline +dependencies. + +Use: + +A driver implementing sync support should have a work submission function which: + * takes a fence argument specifying when to begin work + * asynchronously queues that work to kick off when the fence is signaled + * returns a fence to indicate when its work will be done. + * signals the returned fence once the work is completed. + +Consider an imaginary display driver that has the following API: +/* + * assumes buf is ready to be displayed. + * blocks until the buffer is on screen. + */ + void display_buffer(struct dma_buf *buf); + +The new API will become: +/* + * will display buf when fence is signaled. + * returns immediately with a fence that will signal when buf + * is no longer displayed. + */ +struct sync_fence* display_buffer(struct dma_buf *buf, + struct sync_fence *fence); diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 6f51fed45f2..a5c45d7b0b9 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -1458,6 +1458,35 @@ will produce: 1) 1.449 us | } +You can disable the hierarchical function call formatting and instead print a +flat list of function entry and return events. This uses the format described +in the Output Formatting section and respects all the trace options that +control that formatting. Hierarchical formatting is the default. + + hierachical: echo nofuncgraph-flat > trace_options + flat: echo funcgraph-flat > trace_options + + ie: + + # tracer: function_graph + # + # entries-in-buffer/entries-written: 68355/68355 #P:2 + # + # _-----=> irqs-off + # / _----=> need-resched + # | / _---=> hardirq/softirq + # || / _--=> preempt-depth + # ||| / delay + # TASK-PID CPU# |||| TIMESTAMP FUNCTION + # | | | |||| | | + sh-1806 [001] d... 198.843443: graph_ent: func=_raw_spin_lock + sh-1806 [001] d... 198.843445: graph_ent: func=__raw_spin_lock + sh-1806 [001] d..1 198.843447: graph_ret: func=__raw_spin_lock + sh-1806 [001] d..1 198.843449: graph_ret: func=_raw_spin_lock + sh-1806 [001] d..1 198.843451: graph_ent: func=_raw_spin_unlock_irqrestore + sh-1806 [001] d... 198.843453: graph_ret: func=_raw_spin_unlock_irqrestore + + You might find other useful features for this tracer in the following "dynamic ftrace" section such as tracing only specific functions or tasks. diff --git a/android/configs/README b/android/configs/README new file mode 100644 index 00000000000..8798731f890 --- /dev/null +++ b/android/configs/README @@ -0,0 +1,15 @@ +The files in this directory are meant to be used as a base for an Android +kernel config. All devices should have the options in android-base.cfg enabled. +While not mandatory, the options in android-recommended.cfg enable advanced +Android features. + +Assuming you already have a minimalist defconfig for your device, a possible +way to enable these options would be: + + ARCH= scripts/kconfig/merge_config.sh /_defconfig android/configs/android-base.cfg android/configs/android-recommended.cfg + +This will generate a .config that can then be used to save a new defconfig or +compile a new kernel with Android features enabled. + +Because there is no tool to consistently generate these config fragments, +lets keep them alphabetically sorted instead of random. diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg new file mode 100644 index 00000000000..532fb1a056d --- /dev/null +++ b/android/configs/android-base.cfg @@ -0,0 +1,150 @@ +# KEEP ALPHABETICALLY SORTED +# CONFIG_DEVKMEM is not set +# CONFIG_DEVMEM is not set +# CONFIG_INET_LRO is not set +# CONFIG_MODULES is not set +# CONFIG_OABI_COMPAT is not set +# CONFIG_SYSVIPC is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_ASHMEM=y +CONFIG_AUDIT=y +CONFIG_BLK_DEV_DM=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_SCHED=y +CONFIG_DM_CRYPT=y +CONFIG_DM_VERITY=y +CONFIG_EMBEDDED=y +CONFIG_EXPERIMENTAL=y +CONFIG_FB=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_INET=y +CONFIG_INET_ESP=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_TARGET_REJECT_SKERR=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_REJECT_SKERR=y +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_TPROXY=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_KEY=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_NAT=y +CONFIG_NO_HZ=y +CONFIG_PACKET=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PPP=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PREEMPT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_RTC_CLASS=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y +CONFIG_SECURITY_SELINUX=y +CONFIG_STAGING=y +CONFIG_SWITCH=y +CONFIG_SYNC=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_USB_GADGET=y +CONFIG_USB_G_ANDROID=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_XFRM_USER=y diff --git a/android/configs/android-recommended.cfg b/android/configs/android-recommended.cfg new file mode 100644 index 00000000000..93e8b18a6c0 --- /dev/null +++ b/android/configs/android-recommended.cfg @@ -0,0 +1,119 @@ +# KEEP ALPHABETICALLY SORTED +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_NF_CONNTRACK_SIP is not set +# CONFIG_PM_WAKELOCKS_GC is not set +# CONFIG_VT is not set +CONFIG_ANDROID_RAM_CONSOLE=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_COMPACTION=y +CONFIG_DM_UEVENT=y +CONFIG_DRAGONRISE_FF=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FUSE_FS=y +CONFIG_GREENASIA_FF=y +CONFIG_HIDRAW=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GREENASIA=y +CONFIG_HID_GYRATION=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WACOM=y +CONFIG_HID_WALTOP=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_GPIO=y +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_KEYRESET=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_UINPUT=y +CONFIG_ION=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KSM=y +CONFIG_LOGIG940_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGITECH_FF=y +CONFIG_MD=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_MSDOS_FS=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_PANTHERLORD_FF=y +CONFIG_PERF_EVENTS=y +CONFIG_PM_DEBUG=y +CONFIG_PM_RUNTIME=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +CONFIG_POWER_SUPPLY=y +CONFIG_SCHEDSTATS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_SND=y +CONFIG_SND_USB_AUDIO=y +CONFIG_SOUND=y +CONFIG_SUSPEND_TIME=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_TABLET_USB_WACOM=y +CONFIG_TIMER_STATS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_UHID=y +CONFIG_UID_STAT=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_USBNET=y +CONFIG_VFAT_FS=y diff --git a/arch/arm/common/fiq_debugger.c b/arch/arm/common/fiq_debugger.c index 1908ba95a90..d06863881f0 100644 --- a/arch/arm/common/fiq_debugger.c +++ b/arch/arm/common/fiq_debugger.c @@ -81,7 +81,12 @@ struct fiq_debugger_state { atomic_t unhandled_fiq_count; bool in_fiq; + struct work_struct work; + spinlock_t work_lock; + char work_cmd[DEBUG_MAX]; + #ifdef CONFIG_FIQ_DEBUGGER_CONSOLE + spinlock_t console_lock; struct console console; struct tty_struct *tty; int tty_open_count; @@ -557,6 +562,53 @@ static void do_kgdb(struct fiq_debugger_state *state) } #endif +static void debug_schedule_work(struct fiq_debugger_state *state, char *cmd) +{ + unsigned long flags; + + spin_lock_irqsave(&state->work_lock, flags); + if (state->work_cmd[0] != '\0') { + debug_printf(state, "work command processor busy\n"); + spin_unlock_irqrestore(&state->work_lock, flags); + return; + } + + strlcpy(state->work_cmd, cmd, sizeof(state->work_cmd)); + spin_unlock_irqrestore(&state->work_lock, flags); + + schedule_work(&state->work); +} + +static void debug_work(struct work_struct *work) +{ + struct fiq_debugger_state *state; + char work_cmd[DEBUG_MAX]; + char *cmd; + unsigned long flags; + + state = container_of(work, struct fiq_debugger_state, work); + + spin_lock_irqsave(&state->work_lock, flags); + + strlcpy(work_cmd, state->work_cmd, sizeof(work_cmd)); + state->work_cmd[0] = '\0'; + + spin_unlock_irqrestore(&state->work_lock, flags); + + cmd = work_cmd; + if (!strncmp(cmd, "reboot", 6)) { + cmd += 6; + while (*cmd == ' ') + cmd++; + if (cmd != '\0') + kernel_restart(cmd); + else + kernel_restart(NULL); + } else { + debug_printf(state, "unknown work command '%s'\n", work_cmd); + } +} + /* This function CANNOT be called in FIQ context */ static void debug_irq_exec(struct fiq_debugger_state *state, char *cmd) { @@ -570,6 +622,8 @@ static void debug_irq_exec(struct fiq_debugger_state *state, char *cmd) if (!strcmp(cmd, "kgdb")) do_kgdb(state); #endif + if (!strncmp(cmd, "reboot", 6)) + debug_schedule_work(state, cmd); } static void debug_help(struct fiq_debugger_state *state) @@ -579,7 +633,8 @@ static void debug_help(struct fiq_debugger_state *state) " regs Register dump\n" " allregs Extended Register dump\n" " bt Stack trace\n" - " reboot Reboot\n" + " reboot [] Reboot with command \n" + " reset [] Hard reset with command \n" " irqs Interupt status\n" " kmsg Kernel log\n" " version Kernel version\n"); @@ -630,16 +685,16 @@ static bool debug_fiq_exec(struct fiq_debugger_state *state, dump_allregs(state, regs); } else if (!strcmp(cmd, "bt")) { dump_stacktrace(state, (struct pt_regs *)regs, 100, svc_sp); - } else if (!strncmp(cmd, "reboot", 6)) { - cmd += 6; + } else if (!strncmp(cmd, "reset", 5)) { + cmd += 5; while (*cmd == ' ') cmd++; if (*cmd) { char tmp_cmd[32]; strlcpy(tmp_cmd, cmd, sizeof(tmp_cmd)); - kernel_restart(tmp_cmd); + machine_restart(tmp_cmd); } else { - kernel_restart(NULL); + machine_restart(NULL); } } else if (!strcmp(cmd, "irqs")) { dump_irqs(state); @@ -654,8 +709,9 @@ static bool debug_fiq_exec(struct fiq_debugger_state *state, state->no_sleep = true; debug_printf(state, "disabling sleep\n"); } else if (!strcmp(cmd, "console")) { - state->console_enable = true; debug_printf(state, "console mode\n"); + debug_uart_flush(state); + state->console_enable = true; } else if (!strcmp(cmd, "cpu")) { debug_printf(state, "cpu %d\n", state->current_cpu); } else if (!strncmp(cmd, "cpu ", 4)) { @@ -842,7 +898,8 @@ static bool debug_handle_uart_interrupt(struct fiq_debugger_state *state, } last_c = c; } - debug_uart_flush(state); + if (!state->console_enable) + debug_uart_flush(state); if (state->pdata->fiq_ack) state->pdata->fiq_ack(state->pdev, state->fiq); @@ -926,6 +983,7 @@ static void debug_console_write(struct console *co, const char *s, unsigned int count) { struct fiq_debugger_state *state; + unsigned long flags; state = container_of(co, struct fiq_debugger_state, console); @@ -933,12 +991,14 @@ static void debug_console_write(struct console *co, return; debug_uart_enable(state); + spin_lock_irqsave(&state->console_lock, flags); while (count--) { if (*s == '\n') debug_putc(state, '\r'); debug_putc(state, *s++); } debug_uart_flush(state); + spin_unlock_irqrestore(&state->console_lock, flags); debug_uart_disable(state); } @@ -979,8 +1039,10 @@ int fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) return count; debug_uart_enable(state); + spin_lock_irq(&state->console_lock); for (i = 0; i < count; i++) debug_putc(state, *buf++); + spin_unlock_irq(&state->console_lock); debug_uart_disable(state); return count; @@ -988,7 +1050,7 @@ int fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) int fiq_tty_write_room(struct tty_struct *tty) { - return 1024; + return 16; } #ifdef CONFIG_CONSOLE_POLL @@ -1189,6 +1251,9 @@ static int fiq_debugger_probe(struct platform_device *pdev) state->signal_irq = platform_get_irq_byname(pdev, "signal"); state->wakeup_irq = platform_get_irq_byname(pdev, "wakeup"); + INIT_WORK(&state->work, debug_work); + spin_lock_init(&state->work_lock); + platform_set_drvdata(pdev, state); spin_lock_init(&state->sleep_timer_lock); @@ -1275,6 +1340,7 @@ static int fiq_debugger_probe(struct platform_device *pdev) handle_wakeup(state); #if defined(CONFIG_FIQ_DEBUGGER_CONSOLE) + spin_lock_init(&state->console_lock); state->console = fiq_debugger_console; state->console.index = pdev->id; if (!console_set_on_cmdline) diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 758cb9bdf5d..ba8c627a175 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -11,10 +11,9 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o -aes-arm-y := aes-armv4.o aes_glue.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o -sha1-arm-y := sha1-armv4-large.o sha1_glue.o +sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index cf0c81fc93e..613affcd147 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -35,11 +35,4 @@ typedef struct { #endif -/* - * switch_mm() may do a full cache flush over the context switch, - * so enable interrupts over the context switch to avoid high - * latency. - */ -#define __ARCH_WANT_INTERRUPTS_ON_CTXSW - #endif diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index e8946baa224..72706dc36d9 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -43,45 +43,104 @@ void __check_kvm_seq(struct mm_struct *mm); #define ASID_FIRST_VERSION (1 << ASID_BITS) extern unsigned int cpu_last_asid; -#ifdef CONFIG_SMP -DECLARE_PER_CPU(struct mm_struct *, current_mm); -#endif void __init_new_context(struct task_struct *tsk, struct mm_struct *mm); void __new_context(struct mm_struct *mm); +void cpu_set_reserved_ttbr0(void); -static inline void check_context(struct mm_struct *mm) +static inline void switch_new_context(struct mm_struct *mm) { - /* - * This code is executed with interrupts enabled. Therefore, - * mm->context.id cannot be updated to the latest ASID version - * on a different CPU (and condition below not triggered) - * without first getting an IPI to reset the context. The - * alternative is to take a read_lock on mm->context.id_lock - * (after changing its type to rwlock_t). - */ - if (unlikely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) - __new_context(mm); + unsigned long flags; + __new_context(mm); + + local_irq_save(flags); + cpu_switch_mm(mm->pgd, mm); + local_irq_restore(flags); +} + +static inline void check_and_switch_context(struct mm_struct *mm, + struct task_struct *tsk) +{ if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) __check_kvm_seq(mm); + + /* + * Required during context switch to avoid speculative page table + * walking with the wrong TTBR. + */ + cpu_set_reserved_ttbr0(); + + if (!((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) + /* + * The ASID is from the current generation, just switch to the + * new pgd. This condition is only true for calls from + * context_switch() and interrupts are already disabled. + */ + cpu_switch_mm(mm->pgd, mm); + else if (irqs_disabled()) + /* + * Defer the new ASID allocation until after the context + * switch critical region since __new_context() cannot be + * called with interrupts disabled (it sends IPIs). + */ + set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); + else + /* + * That is a direct call to switch_mm() or activate_mm() with + * interrupts enabled and a new context. + */ + switch_new_context(mm); } #define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0) -#else - -static inline void check_context(struct mm_struct *mm) +#define finish_arch_post_lock_switch \ + finish_arch_post_lock_switch +static inline void finish_arch_post_lock_switch(void) { + if (test_and_clear_thread_flag(TIF_SWITCH_MM)) + switch_new_context(current->mm); +} + +#else /* !CONFIG_CPU_HAS_ASID */ + #ifdef CONFIG_MMU + +static inline void check_and_switch_context(struct mm_struct *mm, + struct task_struct *tsk) +{ if (unlikely(mm->context.kvm_seq != init_mm.context.kvm_seq)) __check_kvm_seq(mm); -#endif + + if (irqs_disabled()) + /* + * cpu_switch_mm() needs to flush the VIVT caches. To avoid + * high interrupt latencies, defer the call and continue + * running with the old mm. Since we only support UP systems + * on non-ASID CPUs, the old mm will remain valid until the + * finish_arch_post_lock_switch() call. + */ + set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); + else + cpu_switch_mm(mm->pgd, mm); } +#define finish_arch_post_lock_switch \ + finish_arch_post_lock_switch +static inline void finish_arch_post_lock_switch(void) +{ + if (test_and_clear_thread_flag(TIF_SWITCH_MM)) { + struct mm_struct *mm = current->mm; + cpu_switch_mm(mm->pgd, mm); + } +} + +#endif /* CONFIG_MMU */ + #define init_new_context(tsk,mm) 0 -#endif +#endif /* CONFIG_CPU_HAS_ASID */ #define destroy_context(mm) do { } while(0) @@ -127,12 +186,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, __flush_icache_all(); #endif if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) { -#ifdef CONFIG_SMP - struct mm_struct **crt_mm = &per_cpu(current_mm, cpu); - *crt_mm = next; -#endif - check_context(next); - cpu_switch_mm(next->pgd, next); + check_and_switch_context(next, tsk); #ifdef CONFIG_TIMA_RKP spin_lock_irqsave(&tima_switch_count_lock, flags); tima_switch_count++; diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 99b4bff8010..d2bb5a6b5eb 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -200,7 +200,15 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { - __pmd_populate(pmdp, page_to_phys(ptep), _PAGE_USER_TABLE); + extern pmdval_t user_pmd_table; + pmdval_t prot; + + if (__LINUX_ARM_ARCH__ >= 6 && !IS_ENABLED(CONFIG_ARM_LPAE)) + prot = user_pmd_table; + else + prot = _PAGE_USER_TABLE; + + __pmd_populate(pmdp, page_to_phys(ptep), prot); } #define pmd_pgtable(pmd) pmd_page(pmd) diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h index 5cfba15cb40..5e68278e953 100644 --- a/arch/arm/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm/include/asm/pgtable-2level-hwdef.h @@ -20,12 +20,14 @@ #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 1) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 2) << 0) +#define PMD_PXNTABLE (_AT(pmdval_t, 1) << 2) /* v7 */ #define PMD_BIT4 (_AT(pmdval_t, 1) << 4) #define PMD_DOMAIN(x) (_AT(pmdval_t, (x)) << 5) #define PMD_PROTECTION (_AT(pmdval_t, 1) << 9) /* v5 */ /* * - section */ +#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 0) /* v7 */ #define PMD_SECT_BUFFERABLE (_AT(pmdval_t, 1) << 2) #define PMD_SECT_CACHEABLE (_AT(pmdval_t, 1) << 3) #define PMD_SECT_XN (_AT(pmdval_t, 1) << 4) /* v6 */ diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 3806d885e8b..77d0d91b124 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -143,6 +143,7 @@ #define L_PTE_MT_DEV_NONSHARED (_AT(pteval_t, 0x0c) << 2) /* 1100 */ #define L_PTE_MT_DEV_WC (_AT(pteval_t, 0x09) << 2) /* 1001 */ #define L_PTE_MT_DEV_CACHED (_AT(pteval_t, 0x0b) << 2) /* 1011 */ +#define L_PTE_MT_VECTORS (_AT(pteval_t, 0x0f) << 2) /* 1111 */ #define L_PTE_MT_MASK (_AT(pteval_t, 0x0f) << 2) #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index d7952824c5c..790b7d61272 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -66,6 +66,7 @@ #define PTE_EXT_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define PTE_EXT_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_EXT_NG (_AT(pteval_t, 1) << 11) /* nG */ +#define PTE_EXT_PXN (_AT(pteval_t, 1) << 53) /* PXN */ #define PTE_EXT_XN (_AT(pteval_t, 1) << 54) /* XN */ /* diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 7013a5c8fdd..664690fb874 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -155,6 +155,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_RESTORE_SIGMASK 20 #define TIF_SECCOMP 21 #define TIF_MM_RELEASED 22 /* task MM has been released */ +#define TIF_SWITCH_MM 23 /* deferred switch_mm */ + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -162,7 +164,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SYSCALL_RESTARTSYS (1 << TIF_SYSCALL_RESTARTSYS) @@ -173,7 +174,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, /* * Change these and you break ASM code in entry-common.S */ -#define _TIF_WORK_MASK 0x000000ff +#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_RESUME) #endif /* __KERNEL__ */ #endif /* __ASM_ARM_THREAD_INFO_H */ diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 57b765f52e3..ba4f0ccb9f5 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -50,19 +50,15 @@ ret_fast_syscall: fast_work_pending: str r0, [sp, #S_R0+S_OFF]! @ returned r0 work_pending: - tst r1, #_TIF_NEED_RESCHED - bne work_resched - tst r1, #_TIF_SIGPENDING|_TIF_NOTIFY_RESUME - beq no_work_pending mov r0, sp @ 'regs' mov r2, why @ 'syscall' - tst r1, #_TIF_SIGPENDING @ delivering a signal? - movne why, #0 @ prevent further restarts - bl do_notify_resume - b ret_slow_syscall @ Check work again + bl do_work_pending + cmp r0, #0 + beq no_work_pending + movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE) + ldmia sp, {r0 - r6} @ have to reload r0 - r6 + b local_restart @ ... and off we go -work_resched: - bl schedule /* * "slow" syscall return path. "why" tells us if this was a real syscall. */ @@ -438,6 +434,7 @@ ENTRY(vector_swi) eor scno, scno, #__NR_SYSCALL_BASE @ check OS number #endif +local_restart: ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing stmdb sp!, {r4, r5} @ push fifth and sixth args @@ -475,7 +472,8 @@ __sys_trace: mov scno, r0 @ syscall number (possibly new) add r1, sp, #S_R0 + S_OFF @ pointer to regs cmp scno, #NR_syscalls @ check upper syscall limit - ldmccia r1, {r0 - r3} @ have to reload r0 - r3 + ldmccia r1, {r0 - r6} @ have to reload r0 - r6 + stmccia sp, {r4, r5} @ and update the stack args ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine b 2b diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index df0bf0c8cb7..34e56647dce 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -179,20 +179,21 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, old = *parent; *parent = return_hooker; + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + *parent = old; + return; + } + err = ftrace_push_return_trace(old, self_addr, &trace.depth, frame_pointer); if (err == -EBUSY) { *parent = old; return; } - - trace.func = self_addr; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; - *parent = old; - } } #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 778c2f7024f..b321c8fbb87 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -144,6 +144,8 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr) { + if (user_mode(regs)) + return -1; kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; @@ -151,6 +153,8 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) { + if (user_mode(regs)) + return -1; compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index b8fb09cb0bd..33058f33d2f 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -934,27 +935,7 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno) */ ip = regs->ARM_ip; regs->ARM_ip = why; - - /* - * IP is used to denote syscall entry/exit: - * IP = 0 -> entry, =1 -> exit - */ - ip = regs->ARM_ip; - regs->ARM_ip = why; - - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } + ptrace_report_syscall(regs); regs->ARM_ip = ip; return current_thread_info()->syscall; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index de697fbede2..34642df9ef8 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -21,7 +21,6 @@ #include #include #include -#include "signal.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -30,7 +29,6 @@ */ #define SWI_SYS_SIGRETURN (0xef000000|(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)) #define SWI_SYS_RT_SIGRETURN (0xef000000|(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)) -#define SWI_SYS_RESTART (0xef000000|__NR_restart_syscall|__NR_OABI_SYSCALL_BASE) /* * With EABI, the syscall number has to be loaded into r7. @@ -45,22 +43,12 @@ #define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE)) #define SWI_THUMB_RT_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) -const unsigned long sigreturn_codes[7] = { +static const unsigned long sigreturn_codes[7] = { MOV_R7_NR_SIGRETURN, SWI_SYS_SIGRETURN, SWI_THUMB_SIGRETURN, MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN, }; -/* - * Either we support OABI only, or we have EABI with the OABI - * compat layer enabled. In the later case we don't know if - * user space is EABI or not, and if not we must not clobber r7. - * Always using the OABI syscall solves that issue and works for - * all those cases. - */ -const unsigned long syscall_restart_code[2] = { - SWI_SYS_RESTART, /* swi __NR_restart_syscall */ - 0xe49df004, /* ldr pc, [sp], #4 */ -}; +static unsigned long signal_return_offset; /* * atomically swap in the new signal mask, and wait for a signal. @@ -92,10 +80,10 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, old_sigset_t mask; if (!access_ok(VERIFY_READ, act, sizeof(*act)) || __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); siginitset(&new_ka.sa.sa_mask, mask); } @@ -104,10 +92,10 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, if (!ret && oact) { if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); } return ret; @@ -622,21 +610,13 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ -static void do_signal(struct pt_regs *regs, int syscall) +static int do_signal(struct pt_regs *regs, int syscall) { unsigned int retval = 0, continue_addr = 0, restart_addr = 0; struct k_sigaction ka; siginfo_t info; int signr; - - /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. - */ - if (!user_mode(regs)) - return; + int restart = 0; /* * If we were from a system call, check for system call restarting... @@ -651,33 +631,34 @@ static void do_signal(struct pt_regs *regs, int syscall) * debugger will see the already changed PSW. */ switch (retval) { + case -ERESTART_RESTARTBLOCK: + restart -= 2; case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: - case -ERESTART_RESTARTBLOCK: + restart++; regs->ARM_r0 = regs->ARM_ORIG_r0; regs->ARM_pc = restart_addr; break; } } - if (try_to_freeze()) - goto no_signal; - /* * Get the signal to deliver. When running under ptrace, at this * point the debugger may change all our registers ... */ signr = get_signal_to_deliver(&info, &ka, regs, NULL); + /* + * Depending on the signal settings we may need to revert the + * decision to restart the system call. But skip this if a + * debugger has chosen to restart at a different PC. + */ + if (regs->ARM_pc != restart_addr) + restart = 0; if (signr > 0) { sigset_t *oldset; - /* - * Depending on the signal settings we may need to revert the - * decision to restart the system call. But skip this if a - * debugger has chosen to restart at a different PC. - */ - if (regs->ARM_pc == restart_addr) { + if (unlikely(restart)) { if (retval == -ERESTARTNOHAND || retval == -ERESTART_RESTARTBLOCK || (retval == -ERESTARTSYS @@ -702,10 +683,9 @@ static void do_signal(struct pt_regs *regs, int syscall) if (test_thread_flag(TIF_RESTORE_SIGMASK)) clear_thread_flag(TIF_RESTORE_SIGMASK); } - return; + return 0; } - no_signal: if (syscall) { /* * Handle restarting a different system call. As above, @@ -722,20 +702,43 @@ static void do_signal(struct pt_regs *regs, int syscall) */ if (test_and_clear_thread_flag(TIF_RESTORE_SIGMASK)) set_current_blocked(¤t->saved_sigmask); + if (unlikely(restart)) + regs->ARM_pc = continue_addr; + return restart; } -asmlinkage void -do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall) +asmlinkage int +do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) { - if (thread_flags & _TIF_SIGPENDING) - do_signal(regs, syscall); - - if (thread_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - if (current->replacement_session_keyring) - key_replace_session_keyring(); - } + do { + if (likely(thread_flags & _TIF_NEED_RESCHED)) { + schedule(); + } else { + if (unlikely(!user_mode(regs))) + return 0; + local_irq_enable(); + if (thread_flags & _TIF_SIGPENDING) { + int restart = do_signal(regs, syscall); + if (unlikely(restart)) { + /* + * Restart without handlers. + * Deal with it without leaving + * the kernel space. + */ + return restart; + } + syscall = 0; + } else { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } + } + local_irq_disable(); + thread_flags = current_thread_info()->flags; + } while (thread_flags & _TIF_WORK_MASK); + return 0; } struct page *get_signal_page(void) diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h deleted file mode 100644 index 6fcfe8398aa..00000000000 --- a/arch/arm/kernel/signal.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * linux/arch/arm/kernel/signal.h - * - * Copyright (C) 2005-2009 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define KERN_SIGRETURN_CODE (CONFIG_VECTORS_BASE + 0x00000500) -#define KERN_RESTART_CODE (KERN_SIGRETURN_CODE + sizeof(sigreturn_codes)) - -extern const unsigned long sigreturn_codes[7]; -extern const unsigned long syscall_restart_code[2]; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5cc0e124ef4..b83419aded1 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -36,7 +36,6 @@ #include #include -#include "signal.h" #ifdef CONFIG_SEC_DEBUG #include #endif diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 403345bf6af..d9d3650cb09 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -27,18 +27,32 @@ DEFINE_PER_CPU(struct mm_struct *, current_mm); #endif #ifdef CONFIG_ARM_LPAE -#define cpu_set_asid(asid) { \ - unsigned long ttbl, ttbh; \ - asm volatile( \ - " mrrc p15, 0, %0, %1, c2 @ read TTBR0\n" \ - " mov %1, %2, lsl #(48 - 32) @ set ASID\n" \ - " mcrr p15, 0, %0, %1, c2 @ set TTBR0\n" \ - : "=&r" (ttbl), "=&r" (ttbh) \ - : "r" (asid & ~ASID_MASK)); \ +void cpu_set_reserved_ttbr0(void) +{ + unsigned long ttbl = __pa(swapper_pg_dir); + unsigned long ttbh = 0; + + /* + * Set TTBR0 to swapper_pg_dir which contains only global entries. The + * ASID is set to 0. + */ + asm volatile( + " mcrr p15, 0, %0, %1, c2 @ set TTBR0\n" + : + : "r" (ttbl), "r" (ttbh)); + isb(); } #else -#define cpu_set_asid(asid) \ - asm(" mcr p15, 0, %0, c13, c0, 1\n" : : "r" (asid)) +void cpu_set_reserved_ttbr0(void) +{ + u32 ttb; + /* Copy TTBR1 into TTBR0 */ + asm volatile( + " mrc p15, 0, %0, c2, c0, 1 @ read TTBR1\n" + " mcr p15, 0, %0, c2, c0, 0 @ set TTBR0\n" + : "=r" (ttb)); + isb(); +} #endif static void write_contextidr(u32 contextidr) @@ -87,26 +101,11 @@ static int __init contextidr_notifier_init(void) return thread_register_notifier(&contextidr_notifier_block); } arch_initcall(contextidr_notifier_init); - -static void set_asid(unsigned int asid) -{ - u32 contextidr = read_contextidr(); - contextidr &= ASID_MASK; - contextidr |= asid & ~ASID_MASK; - write_contextidr(contextidr); -} -#else -static void set_asid(unsigned int asid) -{ - write_contextidr(asid); -} #endif /* * We fork()ed a process, and we need a new context for the child - * to run in. We reserve version 0 for initial tasks so we will - * always allocate an ASID. The ASID 0 is reserved for the TTBR - * register changing sequence. + * to run in. */ void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) { @@ -116,8 +115,7 @@ void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) static void flush_context(void) { - /* set the reserved ASID before flushing the TLB */ - set_asid(0); + cpu_set_reserved_ttbr0(); local_flush_tlb_all(); if (icache_is_vivt_asid_tagged()) { __flush_icache_all(); @@ -162,14 +160,7 @@ static void reset_context(void *info) { unsigned int asid; unsigned int cpu = smp_processor_id(); - struct mm_struct *mm = per_cpu(current_mm, cpu); - - /* - * Check if a current_mm was set on this CPU as it might still - * be in the early booting stages and using the reserved ASID. - */ - if (!mm) - return; + struct mm_struct *mm = current->active_mm; smp_rmb(); asid = cpu_last_asid + cpu + 1; @@ -178,7 +169,7 @@ static void reset_context(void *info) set_mm_context(mm, asid); /* set the new ASID */ - set_asid(mm->context.id); + cpu_switch_mm(mm->pgd, mm); } #else diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c2a75b9cffc..9de6bfda61a 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -506,10 +506,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) local_irq_enable(); /* - * If we're in an interrupt or have no user + * If we're in an interrupt, or have no irqs, or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (in_atomic() || irqs_disabled() || !mm) goto no_context; /* diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index b2acc9a9fdb..a7b8d0868bd 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -282,7 +282,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) - random_factor = (get_random_int() & ((1 << mmap_rnd_bits) - 1)) << PAGE_SHIFT; + random_factor = (get_random_long() & ((1UL << mmap_rnd_bits) - 1)) << PAGE_SHIFT; if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index dd9fc510585..7e47fcd0825 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -49,6 +49,8 @@ EXPORT_SYMBOL(empty_zero_page); */ pmd_t *top_pmd; +pmdval_t user_pmd_table = _PAGE_USER_TABLE; + #define CPOLICY_UNCACHED 0 #define CPOLICY_BUFFERED 1 #define CPOLICY_WRITETHROUGH 2 @@ -547,6 +549,25 @@ static void __init build_mem_type_table(void) } } +#ifndef CONFIG_ARM_LPAE + /* + * We don't use domains on ARMv6 (since this causes problems with + * v6/v7 kernels), so we must use a separate memory type for user + * r/o, kernel r/w to map the vectors page. + */ + if (cpu_arch == CPU_ARCH_ARMv6) + vecs_pgprot |= L_PTE_MT_VECTORS; + + /* + * Check is it with support for the PXN bit + * in the Short-descriptor translation table format descriptors. + */ + if (cpu_arch == CPU_ARCH_ARMv7 && + (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) >= 4) { + user_pmd_table |= PMD_PXNTABLE; + } +#endif + /* * Non-cacheable Normal - intended for memory areas that must * not cause dirty cache line writebacks when used @@ -576,6 +597,11 @@ static void __init build_mem_type_table(void) } kern_pgprot |= PTE_EXT_AF; vecs_pgprot |= PTE_EXT_AF; + + /* + * Set PXN for user mappings + */ + user_pgprot |= PTE_EXT_PXN; #endif for (i = 0; i < 16; i++) { diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 47798cdbecd..74dc5e60eb7 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -52,21 +52,13 @@ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif -#ifdef CONFIG_ARM_ERRATA_754322 - dsb -#endif #ifdef CONFIG_PID_IN_CONTEXTIDR mrc p15, 0, r2, c13, c0, 1 @ read current context ID bic r2, r2, #0xff @ extract the PID and r1, r1, #0xff orr r1, r1, r2 @ insert the PID into r1 #endif - mcr p15, 0, r2, c13, c0, 1 @ set reserved context ID - isb -1: -#ifndef CONFIG_TIMA_RKP - mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 -#else +#ifdef CONFIG_TIMA_RKP stmfd sp!, {r2, r11} /* TZ side expects r0 to be in r11 */ mov r11, r0 @@ -90,6 +82,8 @@ ENTRY(cpu_v7_switch_mm) #endif mcr p15, 0, r1, c13, c0, 1 @ set context ID isb + mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 + isb #endif mov pc, lr ENDPROC(cpu_v7_switch_mm) diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 302d779d5b0..3b8700c3cdc 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -224,7 +224,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) { - random_factor = get_random_int(); + random_factor = get_random_long(); random_factor = random_factor << PAGE_SHIFT; if (TASK_IS_32BIT_ADDR) random_factor &= 0xfffffful; @@ -245,7 +245,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) static inline unsigned long brk_rnd(void) { - unsigned long rnd = get_random_int(); + unsigned long rnd = get_random_long(); rnd = rnd << PAGE_SHIFT; /* 8MB for 32bit, 256MB for 64bit */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4937c969009..12aa614a76a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1296,9 +1296,9 @@ static inline unsigned long brk_rnd(void) /* 8MB for 32bit, 1GB for 64bit */ if (is_32bit_task()) - rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); + rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT))); else - rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); + rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT))); return rnd << PAGE_SHIFT; } diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c index 67a42ed0d2f..2803d90300a 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap_64.c @@ -60,9 +60,9 @@ static unsigned long mmap_rnd(void) if (current->flags & PF_RANDOMIZE) { /* 8MB for 32bit, 1GB for 64bit */ if (is_32bit_task()) - rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); + rnd = get_random_long() % (1<<(23-PAGE_SHIFT)); else - rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); + rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT)); } return rnd << PAGE_SHIFT; } diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 3ee51f189a5..46b63be134b 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -366,7 +366,7 @@ static unsigned long mmap_rnd(void) unsigned long rnd = 0UL; if (current->flags & PF_RANDOMIZE) { - unsigned long val = get_random_int(); + unsigned long val = get_random_long(); if (test_thread_flag(TIF_32BIT)) rnd = (val % (1UL << (23UL-PAGE_SHIFT))); else diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 948f0c9ebe5..5ef166ca817 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -82,7 +82,10 @@ config X86 select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP - select DCACHE_WORD_ACCESS + select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC + select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT + select HAVE_ARCH_SECCOMP_FILTER config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) @@ -126,6 +129,20 @@ config HAVE_LATENCYTOP_SUPPORT config MMU def_bool y +config ARCH_MMAP_RND_BITS_MIN + default 28 if 64BIT + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 32 if 64BIT + default 16 + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + default 8 + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + default 16 + config SBUS bool @@ -1261,10 +1278,6 @@ config HAVE_ARCH_BOOTMEM def_bool y depends on X86_32 && NUMA -config HAVE_ARCH_ALLOC_REMAP - def_bool y - depends on X86_32 && NUMA - config ARCH_HAVE_MEMORY_PRESENT def_bool y depends on X86_32 && DISCONTIGMEM diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 75f9e5d80d0..75b36cce24d 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -69,15 +69,15 @@ static unsigned long mmap_rnd(void) { unsigned long rnd = 0; - /* - * 8 bits of randomness in 32bit mmaps, 20 address space bits - * 28 bits of randomness in 64bit mmaps, 40 address space bits - */ if (current->flags & PF_RANDOMIZE) { if (mmap_is_ia32()) - rnd = get_random_int() % (1<<8); +#ifdef CONFIG_COMPAT + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); +#else + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); +#endif else - rnd = get_random_int() % (1<<28); + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); } return rnd << PAGE_SHIFT; } diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 73ce9fbe983..83aa694a8ef 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -38,11 +39,13 @@ ktime_t __start = ktime_get(); \ type __retval = GENPD_DEV_CALLBACK(genpd, type, callback, dev); \ s64 __elapsed = ktime_to_ns(ktime_sub(ktime_get(), __start)); \ - struct generic_pm_domain_data *__gpd_data = dev_gpd_data(dev); \ - if (__elapsed > __gpd_data->td.field) { \ - __gpd_data->td.field = __elapsed; \ + struct gpd_timing_data *__td = &dev_gpd_data(dev)->td; \ + if (!__retval && __elapsed > __td->field) { \ + __td->field = __elapsed; \ dev_warn(dev, name " latency exceeded, new value %lld ns\n", \ __elapsed); \ + genpd->max_off_time_changed = true; \ + __td->constraint_changed = true; \ } \ __retval; \ }) @@ -211,6 +214,7 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd) elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); if (elapsed_ns > genpd->power_on_latency_ns) { genpd->power_on_latency_ns = elapsed_ns; + genpd->max_off_time_changed = true; if (genpd->name) pr_warning("%s: Power-on latency exceeded, " "new value %lld ns\n", genpd->name, @@ -247,6 +251,53 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd) #ifdef CONFIG_PM_RUNTIME +static int genpd_dev_pm_qos_notifier(struct notifier_block *nb, + unsigned long val, void *ptr) +{ + struct generic_pm_domain_data *gpd_data; + struct device *dev; + + gpd_data = container_of(nb, struct generic_pm_domain_data, nb); + + mutex_lock(&gpd_data->lock); + dev = gpd_data->base.dev; + if (!dev) { + mutex_unlock(&gpd_data->lock); + return NOTIFY_DONE; + } + mutex_unlock(&gpd_data->lock); + + for (;;) { + struct generic_pm_domain *genpd; + struct pm_domain_data *pdd; + + spin_lock_irq(&dev->power.lock); + + pdd = dev->power.subsys_data ? + dev->power.subsys_data->domain_data : NULL; + if (pdd) { + to_gpd_data(pdd)->td.constraint_changed = true; + genpd = dev_to_genpd(dev); + } else { + genpd = ERR_PTR(-ENODATA); + } + + spin_unlock_irq(&dev->power.lock); + + if (!IS_ERR(genpd)) { + mutex_lock(&genpd->lock); + genpd->max_off_time_changed = true; + mutex_unlock(&genpd->lock); + } + + dev = dev->parent; + if (!dev || dev->power.ignore_children) + break; + } + + return NOTIFY_DONE; +} + /** * __pm_genpd_save_device - Save the pre-suspend state of a device. * @pdd: Domain data of the device to save the state of. @@ -435,6 +486,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); if (elapsed_ns > genpd->power_off_latency_ns) { genpd->power_off_latency_ns = elapsed_ns; + genpd->max_off_time_changed = true; if (genpd->name) pr_warning("%s: Power-off latency exceeded, " "new value %lld ns\n", genpd->name, @@ -443,17 +495,6 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) } genpd->status = GPD_STATE_POWER_OFF; - genpd->power_off_time = ktime_get(); - - /* Update PM QoS information for devices in the domain. */ - list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) { - struct gpd_timing_data *td = &to_gpd_data(pdd)->td; - - pm_runtime_update_max_time_suspended(pdd->dev, - td->start_latency_ns + - td->restore_state_latency_ns + - genpd->power_on_latency_ns); - } list_for_each_entry(link, &genpd->slave_links, slave_node) { genpd_sd_counter_dec(link->master); @@ -514,9 +555,6 @@ static int pm_genpd_runtime_suspend(struct device *dev) if (ret) return ret; - pm_runtime_update_max_time_suspended(dev, - dev_gpd_data(dev)->td.start_latency_ns); - /* * If power.irq_safe is set, this routine will be run with interrupts * off, so it can't use mutexes. @@ -613,6 +651,12 @@ void pm_genpd_poweroff_unused(void) #else +static inline int genpd_dev_pm_qos_notifier(struct notifier_block *nb, + unsigned long val, void *ptr) +{ + return NOTIFY_DONE; +} + static inline void genpd_power_off_work_fn(struct work_struct *work) {} #define pm_genpd_runtime_suspend NULL @@ -1209,12 +1253,15 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) return -EINVAL; - genpd_acquire_lock(genpd); + gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL); + if (!gpd_data) + return -ENOMEM; - if (genpd->status == GPD_STATE_POWER_OFF) { - ret = -EINVAL; - goto out; - } + mutex_init(&gpd_data->lock); + gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier; + dev_pm_qos_add_notifier(dev, &gpd_data->nb); + + genpd_acquire_lock(genpd); if (genpd->prepared_count > 0) { ret = -EAGAIN; @@ -1227,26 +1274,35 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, goto out; } - gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL); - if (!gpd_data) { - ret = -ENOMEM; - goto out; - } - genpd->device_count++; + genpd->max_off_time_changed = true; - dev->pm_domain = &genpd->domain; dev_pm_get_subsys_data(dev); + + mutex_lock(&gpd_data->lock); + spin_lock_irq(&dev->power.lock); + dev->pm_domain = &genpd->domain; dev->power.subsys_data->domain_data = &gpd_data->base; gpd_data->base.dev = dev; - gpd_data->need_restore = false; list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); + gpd_data->need_restore = genpd->status == GPD_STATE_POWER_OFF; if (td) gpd_data->td = *td; + gpd_data->td.constraint_changed = true; + gpd_data->td.effective_constraint_ns = -1; + spin_unlock_irq(&dev->power.lock); + mutex_unlock(&gpd_data->lock); + + genpd_release_lock(genpd); + + return 0; + out: genpd_release_lock(genpd); + dev_pm_qos_remove_notifier(dev, &gpd_data->nb); + kfree(gpd_data); return ret; } @@ -1290,12 +1346,15 @@ int __pm_genpd_of_add_device(struct device_node *genpd_node, struct device *dev, int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev) { + struct generic_pm_domain_data *gpd_data; struct pm_domain_data *pdd; - int ret = -EINVAL; + int ret = 0; dev_dbg(dev, "%s()\n", __func__); - if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) + if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev) + || IS_ERR_OR_NULL(dev->pm_domain) + || pd_to_genpd(dev->pm_domain) != genpd) return -EINVAL; genpd_acquire_lock(genpd); @@ -1305,21 +1364,27 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(pdd, &genpd->dev_list, list_node) { - if (pdd->dev != dev) - continue; + genpd->device_count--; + genpd->max_off_time_changed = true; - list_del_init(&pdd->list_node); - pdd->dev = NULL; - dev_pm_put_subsys_data(dev); - dev->pm_domain = NULL; - kfree(to_gpd_data(pdd)); + spin_lock_irq(&dev->power.lock); + dev->pm_domain = NULL; + pdd = dev->power.subsys_data->domain_data; + list_del_init(&pdd->list_node); + dev->power.subsys_data->domain_data = NULL; + spin_unlock_irq(&dev->power.lock); - genpd->device_count--; + gpd_data = to_gpd_data(pdd); + mutex_lock(&gpd_data->lock); + pdd->dev = NULL; + mutex_unlock(&gpd_data->lock); - ret = 0; - break; - } + genpd_release_lock(genpd); + + dev_pm_qos_remove_notifier(dev, &gpd_data->nb); + kfree(gpd_data); + dev_pm_put_subsys_data(dev); + return 0; out: genpd_release_lock(genpd); @@ -1347,6 +1412,26 @@ void pm_genpd_dev_always_on(struct device *dev, bool val) } EXPORT_SYMBOL_GPL(pm_genpd_dev_always_on); +/** + * pm_genpd_dev_need_restore - Set/unset the device's "need restore" flag. + * @dev: Device to set/unset the flag for. + * @val: The new value of the device's "need restore" flag. + */ +void pm_genpd_dev_need_restore(struct device *dev, bool val) +{ + struct pm_subsys_data *psd; + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); + + psd = dev_to_psd(dev); + if (psd && psd->domain_data) + to_gpd_data(psd->domain_data)->need_restore = val; + + spin_unlock_irqrestore(&dev->power.lock, flags); +} +EXPORT_SYMBOL_GPL(pm_genpd_dev_need_restore); + /** * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain. * @genpd: Master PM domain to add the subdomain to. @@ -1378,7 +1463,7 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(link, &genpd->slave_links, slave_node) { + list_for_each_entry(link, &genpd->master_links, master_node) { if (link->slave == subdomain && link->master == genpd) { ret = -EINVAL; goto out; @@ -1690,6 +1775,7 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->resume_count = 0; genpd->device_count = 0; genpd->max_off_time_ns = -1; + genpd->max_off_time_changed = true; genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend; genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume; genpd->domain.ops.runtime_idle = pm_generic_runtime_idle; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 66a265bf586..28dee3053f1 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -14,6 +14,31 @@ #ifdef CONFIG_PM_RUNTIME +static int dev_update_qos_constraint(struct device *dev, void *data) +{ + s64 *constraint_ns_p = data; + s32 constraint_ns = -1; + + if (dev->power.subsys_data && dev->power.subsys_data->domain_data) + constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns; + + if (constraint_ns < 0) { + constraint_ns = dev_pm_qos_read_value(dev); + constraint_ns *= NSEC_PER_USEC; + } + if (constraint_ns == 0) + return 0; + + /* + * constraint_ns cannot be negative here, because the device has been + * suspended. + */ + if (constraint_ns < *constraint_ns_p || *constraint_ns_p == 0) + *constraint_ns_p = constraint_ns; + + return 0; +} + /** * default_stop_ok - Default PM domain governor routine for stopping devices. * @dev: Device to check. @@ -21,14 +46,52 @@ bool default_stop_ok(struct device *dev) { struct gpd_timing_data *td = &dev_gpd_data(dev)->td; + unsigned long flags; + s64 constraint_ns; dev_dbg(dev, "%s()\n", __func__); - if (dev->power.max_time_suspended_ns < 0 || td->break_even_ns == 0) - return true; + spin_lock_irqsave(&dev->power.lock, flags); - return td->stop_latency_ns + td->start_latency_ns < td->break_even_ns - && td->break_even_ns < dev->power.max_time_suspended_ns; + if (!td->constraint_changed) { + bool ret = td->cached_stop_ok; + + spin_unlock_irqrestore(&dev->power.lock, flags); + return ret; + } + td->constraint_changed = false; + td->cached_stop_ok = false; + td->effective_constraint_ns = -1; + constraint_ns = __dev_pm_qos_read_value(dev); + + spin_unlock_irqrestore(&dev->power.lock, flags); + + if (constraint_ns < 0) + return false; + + constraint_ns *= NSEC_PER_USEC; + /* + * We can walk the children without any additional locking, because + * they all have been suspended at this point and their + * effective_constraint_ns fields won't be modified in parallel with us. + */ + if (!dev->power.ignore_children) + device_for_each_child(dev, &constraint_ns, + dev_update_qos_constraint); + + if (constraint_ns > 0) { + constraint_ns -= td->start_latency_ns; + if (constraint_ns == 0) + return false; + } + td->effective_constraint_ns = constraint_ns; + td->cached_stop_ok = constraint_ns > td->stop_latency_ns || + constraint_ns == 0; + /* + * The children have been suspended already, so we don't need to take + * their stop latencies into account here. + */ + return td->cached_stop_ok; } /** @@ -42,9 +105,27 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) struct generic_pm_domain *genpd = pd_to_genpd(pd); struct gpd_link *link; struct pm_domain_data *pdd; - s64 min_dev_off_time_ns; + s64 min_off_time_ns; s64 off_on_time_ns; - ktime_t time_now = ktime_get(); + + if (genpd->max_off_time_changed) { + struct gpd_link *link; + + /* + * We have to invalidate the cached results for the masters, so + * use the observation that default_power_down_ok() is not + * going to be called for any master until this instance + * returns. + */ + list_for_each_entry(link, &genpd->slave_links, slave_node) + link->master->max_off_time_changed = true; + + genpd->max_off_time_changed = false; + genpd->cached_power_down_ok = false; + genpd->max_off_time_ns = -1; + } else { + return genpd->cached_power_down_ok; + } off_on_time_ns = genpd->power_off_latency_ns + genpd->power_on_latency_ns; @@ -61,6 +142,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) to_gpd_data(pdd)->td.save_state_latency_ns; } + min_off_time_ns = -1; /* * Check if subdomains can be off for enough time. * @@ -73,8 +155,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) if (sd_max_off_ns < 0) continue; - sd_max_off_ns -= ktime_to_ns(ktime_sub(time_now, - sd->power_off_time)); /* * Check if the subdomain is allowed to be off long enough for * the current domain to turn off and on (that's how much time @@ -82,60 +162,64 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) */ if (sd_max_off_ns <= off_on_time_ns) return false; + + if (min_off_time_ns > sd_max_off_ns || min_off_time_ns < 0) + min_off_time_ns = sd_max_off_ns; } /* * Check if the devices in the domain can be off enough time. */ - min_dev_off_time_ns = -1; list_for_each_entry(pdd, &genpd->dev_list, list_node) { struct gpd_timing_data *td; - struct device *dev = pdd->dev; - s64 dev_off_time_ns; + s64 constraint_ns; - if (!dev->driver || dev->power.max_time_suspended_ns < 0) + if (!pdd->dev->driver) continue; + /* + * Check if the device is allowed to be off long enough for the + * domain to turn off and on (that's how much time it will + * have to wait worst case). + */ td = &to_gpd_data(pdd)->td; - dev_off_time_ns = dev->power.max_time_suspended_ns - - (td->start_latency_ns + td->restore_state_latency_ns + - ktime_to_ns(ktime_sub(time_now, - dev->power.suspend_time))); - if (dev_off_time_ns <= off_on_time_ns) + constraint_ns = td->effective_constraint_ns; + /* default_stop_ok() need not be called before us. */ + if (constraint_ns < 0) { + constraint_ns = dev_pm_qos_read_value(pdd->dev); + constraint_ns *= NSEC_PER_USEC; + } + if (constraint_ns == 0) + continue; + + /* + * constraint_ns cannot be negative here, because the device has + * been suspended. + */ + constraint_ns -= td->restore_state_latency_ns; + if (constraint_ns <= off_on_time_ns) return false; - if (min_dev_off_time_ns > dev_off_time_ns - || min_dev_off_time_ns < 0) - min_dev_off_time_ns = dev_off_time_ns; + if (min_off_time_ns > constraint_ns || min_off_time_ns < 0) + min_off_time_ns = constraint_ns; } - if (min_dev_off_time_ns < 0) { - /* - * There are no latency constraints, so the domain can spend - * arbitrary time in the "off" state. - */ - genpd->max_off_time_ns = -1; + genpd->cached_power_down_ok = true; + + /* + * If the computed minimum device off time is negative, there are no + * latency constraints, so the domain can spend arbitrary time in the + * "off" state. + */ + if (min_off_time_ns < 0) return true; - } /* - * The difference between the computed minimum delta and the time needed - * to turn the domain on is the maximum theoretical time this domain can - * spend in the "off" state. + * The difference between the computed minimum subdomain or device off + * time and the time needed to turn the domain on is the maximum + * theoretical time this domain can spend in the "off" state. */ - min_dev_off_time_ns -= genpd->power_on_latency_ns; - - /* - * If the difference between the computed minimum delta and the time - * needed to turn the domain off and back on on is smaller than the - * domain's power break even time, removing power from the domain is not - * worth it. - */ - if (genpd->break_even_ns > - min_dev_off_time_ns - genpd->power_off_latency_ns) - return false; - - genpd->max_off_time_ns = min_dev_off_time_ns; + genpd->max_off_time_ns = min_off_time_ns - genpd->power_on_latency_ns; return true; } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index df9c10426b4..2aec3579e85 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -55,10 +55,10 @@ struct suspend_stats suspend_stats; static DEFINE_MUTEX(dpm_list_mtx); static pm_message_t pm_transition; -static void dpm_drv_timeout(unsigned long data); -struct dpm_drv_wd_data { - struct device *dev; - struct task_struct *tsk; +struct dpm_watchdog { + struct device *dev; + struct task_struct *tsk; + struct timer_list timer; }; static int async_error; @@ -396,6 +396,56 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev, return error; } +/** + * dpm_wd_handler - Driver suspend / resume watchdog handler. + * + * Called when a driver has timed out suspending or resuming. + * There's not much we can do here to recover so BUG() out for + * a crash-dump + */ +static void dpm_wd_handler(unsigned long data) +{ + struct dpm_watchdog *wd = (void *)data; + struct device *dev = wd->dev; + struct task_struct *tsk = wd->tsk; + + dev_emerg(dev, "**** DPM device timeout ****\n"); + show_stack(tsk, NULL); + + BUG(); +} + +/** + * dpm_wd_set - Enable pm watchdog for given device. + * @wd: Watchdog. Must be allocated on the stack. + * @dev: Device to handle. + */ +static void dpm_wd_set(struct dpm_watchdog *wd, struct device *dev) +{ + struct timer_list *timer = &wd->timer; + + wd->dev = dev; + wd->tsk = get_current(); + + init_timer_on_stack(timer); + timer->expires = jiffies + HZ * 12; + timer->function = dpm_wd_handler; + timer->data = (unsigned long)wd; + add_timer(timer); +} + +/** + * dpm_wd_clear - Disable pm watchdog. + * @wd: Watchdog to disable. + */ +static void dpm_wd_clear(struct dpm_watchdog *wd) +{ + struct timer_list *timer = &wd->timer; + + del_timer_sync(timer); + destroy_timer_on_stack(timer); +} + /*------------------------- Resume routines -------------------------*/ /** @@ -571,6 +621,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) pm_callback_t callback = NULL; char *info = NULL; int error = 0; + struct dpm_watchdog wd; TRACE_DEVICE(dev); TRACE_RESUME(0); @@ -583,6 +634,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) * a resumed device, even if the device hasn't been completed yet. */ dev->power.is_prepared = false; + dpm_wd_set(&wd, dev); if (!dev->power.is_suspended) goto Unlock; @@ -636,6 +688,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) Unlock: device_unlock(dev); + dpm_wd_clear(&wd); complete_all(&dev->power.completion); TRACE_RESUME(error); @@ -660,30 +713,6 @@ static bool is_async(struct device *dev) && !pm_trace_is_enabled(); } -/** - * dpm_drv_timeout - Driver suspend / resume watchdog handler - * @data: struct device which timed out - * - * Called when a driver has timed out suspending or resuming. - * There's not much we can do here to recover so - * BUG() out for a crash-dump - * - */ -static void dpm_drv_timeout(unsigned long data) -{ - struct dpm_drv_wd_data *wd_data = (void *)data; - struct device *dev = wd_data->dev; - struct task_struct *tsk = wd_data->tsk; - - printk(KERN_EMERG "**** DPM device timeout: %s (%s)\n", dev_name(dev), - (dev->driver ? dev->driver->name : "no driver")); - - printk(KERN_EMERG "dpm suspend stack:\n"); - show_stack(tsk, NULL); - - BUG(); -} - /** * dpm_resume - Execute "resume" callbacks for non-sysdev devices. * @state: PM transition of the system being carried out. @@ -1063,8 +1092,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_callback_t callback = NULL; char *info = NULL; int error = 0; - struct timer_list timer; - struct dpm_drv_wd_data data; + struct dpm_watchdog wd; dpm_wait_for_children(dev, async); @@ -1085,13 +1113,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) goto Complete; } - data.dev = dev; - data.tsk = get_current(); - init_timer_on_stack(&timer); - timer.expires = jiffies + HZ * 12; - timer.function = dpm_drv_timeout; - timer.data = (unsigned long)&data; - add_timer(&timer); + dpm_wd_set(&wd, dev); device_lock(dev); @@ -1148,8 +1170,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) device_unlock(dev); - del_timer_sync(&timer); - destroy_timer_on_stack(&timer); + dpm_wd_clear(&wd); Complete: complete_all(&dev->power.completion); diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index c365c93abe1..b95ebf2d56e 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -352,21 +352,26 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_remove_request); * * Will register the notifier into a notification chain that gets called * upon changes to the target value for the device. + * + * If the device's constraints object doesn't exist when this routine is called, + * it will be created (or error code will be returned if that fails). */ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier) { - int retval = 0; + int ret = 0; mutex_lock(&dev_pm_qos_mtx); - /* Silently return if the constraints object is not present. */ - if (dev->power.constraints) - retval = blocking_notifier_chain_register( - dev->power.constraints->notifiers, - notifier); + if (!dev->power.constraints) + ret = dev->power.power_state.event != PM_EVENT_INVALID ? + dev_pm_qos_constraints_allocate(dev) : -ENODEV; + + if (!ret) + ret = blocking_notifier_chain_register( + dev->power.constraints->notifiers, notifier); mutex_unlock(&dev_pm_qos_mtx); - return retval; + return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_add_notifier); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index bb82b1817ae..b6e9d9b7982 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -282,47 +282,6 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) return retval != -EACCES ? retval : -EIO; } -struct rpm_qos_data { - ktime_t time_now; - s64 constraint_ns; -}; - -/** - * rpm_update_qos_constraint - Update a given PM QoS constraint data. - * @dev: Device whose timing data to use. - * @data: PM QoS constraint data to update. - * - * Use the suspend timing data of @dev to update PM QoS constraint data pointed - * to by @data. - */ -static int rpm_update_qos_constraint(struct device *dev, void *data) -{ - struct rpm_qos_data *qos = data; - unsigned long flags; - s64 delta_ns; - int ret = 0; - - spin_lock_irqsave(&dev->power.lock, flags); - - if (dev->power.max_time_suspended_ns < 0) - goto out; - - delta_ns = dev->power.max_time_suspended_ns - - ktime_to_ns(ktime_sub(qos->time_now, dev->power.suspend_time)); - if (delta_ns <= 0) { - ret = -EBUSY; - goto out; - } - - if (qos->constraint_ns > delta_ns || qos->constraint_ns == 0) - qos->constraint_ns = delta_ns; - - out: - spin_unlock_irqrestore(&dev->power.lock, flags); - - return ret; -} - /** * rpm_suspend - Carry out runtime suspend of given device. * @dev: Device to suspend. @@ -349,7 +308,6 @@ static int rpm_suspend(struct device *dev, int rpmflags) { int (*callback)(struct device *); struct device *parent = NULL; - struct rpm_qos_data qos; int retval; trace_rpm_suspend(dev, rpmflags); @@ -444,38 +402,14 @@ static int rpm_suspend(struct device *dev, int rpmflags) goto out; } - qos.constraint_ns = __dev_pm_qos_read_value(dev); - if (qos.constraint_ns < 0) { - /* Negative constraint means "never suspend". */ + if (__dev_pm_qos_read_value(dev) < 0) { + /* Negative PM QoS constraint means "never suspend". */ retval = -EPERM; goto out; } - qos.constraint_ns *= NSEC_PER_USEC; - qos.time_now = ktime_get(); __update_runtime_status(dev, RPM_SUSPENDING); - if (!dev->power.ignore_children) { - if (dev->power.irq_safe) - spin_unlock(&dev->power.lock); - else - spin_unlock_irq(&dev->power.lock); - - retval = device_for_each_child(dev, &qos, - rpm_update_qos_constraint); - - if (dev->power.irq_safe) - spin_lock(&dev->power.lock); - else - spin_lock_irq(&dev->power.lock); - - if (retval) - goto fail; - } - - dev->power.suspend_time = qos.time_now; - dev->power.max_time_suspended_ns = qos.constraint_ns ? : -1; - if (dev->pm_domain) callback = dev->pm_domain->ops.runtime_suspend; else if (dev->type && dev->type->pm) @@ -529,8 +463,6 @@ static int rpm_suspend(struct device *dev, int rpmflags) fail: __update_runtime_status(dev, RPM_ACTIVE); - dev->power.suspend_time = ktime_set(0, 0); - dev->power.max_time_suspended_ns = -1; dev->power.deferred_resume = false; wake_up_all(&dev->power.wait_queue); @@ -705,9 +637,6 @@ static int rpm_resume(struct device *dev, int rpmflags) if (dev->power.no_callbacks) goto no_callback; /* Assume success. */ - dev->power.suspend_time = ktime_set(0, 0); - dev->power.max_time_suspended_ns = -1; - __update_runtime_status(dev, RPM_RESUMING); if (dev->pm_domain) @@ -1370,9 +1299,6 @@ void pm_runtime_init(struct device *dev) setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn, (unsigned long)dev); - dev->power.suspend_time = ktime_set(0, 0); - dev->power.max_time_suspended_ns = -1; - init_waitqueue_head(&dev->power.wait_queue); } @@ -1390,28 +1316,3 @@ void pm_runtime_remove(struct device *dev) if (dev->power.irq_safe && dev->parent) pm_runtime_put_sync(dev->parent); } - -/** - * pm_runtime_update_max_time_suspended - Update device's suspend time data. - * @dev: Device to handle. - * @delta_ns: Value to subtract from the device's max_time_suspended_ns field. - * - * Update the device's power.max_time_suspended_ns field by subtracting - * @delta_ns from it. The resulting value of power.max_time_suspended_ns is - * never negative. - */ -void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns) -{ - unsigned long flags; - - spin_lock_irqsave(&dev->power.lock, flags); - - if (delta_ns > 0 && dev->power.max_time_suspended_ns > 0) { - if (dev->power.max_time_suspended_ns > delta_ns) - dev->power.max_time_suspended_ns -= delta_ns; - else - dev->power.max_time_suspended_ns = 0; - } - - spin_unlock_irqrestore(&dev->power.lock, flags); -} diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 37c6fd0b36d..3bc02faefc2 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -653,6 +653,31 @@ void pm_wakeup_event(struct device *dev, unsigned int msec) } EXPORT_SYMBOL_GPL(pm_wakeup_event); +static void print_active_wakeup_sources(void) +{ + struct wakeup_source *ws; + int active = 0; + struct wakeup_source *last_activity_ws = NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + if (ws->active) { + pr_info("active wakeup source: %s\n", ws->name); + active = 1; + } else if (!active && + (!last_activity_ws || + ktime_to_ns(ws->last_time) > + ktime_to_ns(last_activity_ws->last_time))) { + last_activity_ws = ws; + } + } + + if (!active && last_activity_ws) + pr_info("last active wakeup source: %s\n", + last_activity_ws->name); + rcu_read_unlock(); +} + /** * pm_wakeup_pending - Check if power transition in progress should be aborted. * @@ -675,6 +700,10 @@ bool pm_wakeup_pending(void) events_check_enabled = !ret; } spin_unlock_irqrestore(&events_lock, flags); + + if (ret) + print_active_wakeup_sources(); + return ret; } diff --git a/drivers/base/sw_sync.c b/drivers/base/sw_sync.c index 14d3f39219b..b4d8529ee89 100644 --- a/drivers/base/sw_sync.c +++ b/drivers/base/sw_sync.c @@ -167,8 +167,13 @@ long sw_sync_ioctl_create_fence(struct sw_sync_timeline *obj, unsigned long arg) struct sync_fence *fence; struct sw_sync_create_fence_data data; - if (copy_from_user(&data, (void __user *)arg, sizeof(data))) - return -EFAULT; + if (fd < 0) + return fd; + + if (copy_from_user(&data, (void __user *)arg, sizeof(data))) { + err = -EFAULT; + goto err; + } pt = sw_sync_pt_create(obj, data.value); if (pt == NULL) { diff --git a/drivers/base/sync.c b/drivers/base/sync.c index 7e1ecf34c9a..cf4a1c3ef3b 100644 --- a/drivers/base/sync.c +++ b/drivers/base/sync.c @@ -78,13 +78,13 @@ static void sync_timeline_free(struct kref *kref) container_of(kref, struct sync_timeline, kref); unsigned long flags; - if (obj->ops->release_obj) - obj->ops->release_obj(obj); - spin_lock_irqsave(&sync_timeline_list_lock, flags); list_del(&obj->sync_timeline_list); spin_unlock_irqrestore(&sync_timeline_list_lock, flags); + if (obj->ops->release_obj) + obj->ops->release_obj(obj); + kfree(obj); } diff --git a/drivers/char/random.c b/drivers/char/random.c index 3edb71f8f26..364299bcc81 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1481,6 +1481,28 @@ unsigned int get_random_int(void) return ret; } +/* + * Same as get_random_int(), but returns unsigned long. + */ +unsigned long get_random_long(void) +{ + __u32 *hash; + unsigned long ret; + + if (arch_get_random_long(&ret)) + return ret; + + hash = get_cpu_var(get_random_int_hash); + + hash[0] += current->pid + jiffies + get_cycles(); + md5_transform(hash, random_int_secret); + ret = *(unsigned long *)hash; + put_cpu_var(get_random_int_hash); + + return ret; +} +EXPORT_SYMBOL(get_random_long); + /* * randomize_range() returns a start address such that * diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 15a6af82e94..1c5c9190df3 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -393,6 +393,8 @@ struct pl330_req { struct pl330_reqcfg *cfg; /* Pointer to first xfer in the request. */ struct pl330_xfer *x; + /* Hook to attach to DMAC's list of reqs with due callback */ + struct list_head rqd; }; /* @@ -462,8 +464,6 @@ struct _pl330_req { /* Number of bytes taken to setup MC for the req */ u32 mc_len; struct pl330_req *r; - /* Hook to attach to DMAC's list of reqs with due callback */ - struct list_head rqd; }; /* ToBeDone for tasklet */ @@ -1686,7 +1686,7 @@ static void pl330_dotask(unsigned long data) /* Returns 1 if state was updated, 0 otherwise */ static int pl330_update(const struct pl330_info *pi) { - struct _pl330_req *rqdone; + struct pl330_req *rqdone, *tmp; struct pl330_dmac *pl330; unsigned long flags; void __iomem *regs; @@ -1753,7 +1753,10 @@ static int pl330_update(const struct pl330_info *pi) if (active == -1) /* Aborted */ continue; - rqdone = &thrd->req[active]; + /* Detach the req */ + rqdone = thrd->req[active].r; + thrd->req[active].r = NULL; + mark_free(thrd, active); /* Get going again ASAP */ @@ -1765,20 +1768,11 @@ static int pl330_update(const struct pl330_info *pi) } /* Now that we are in no hurry, do the callbacks */ - while (!list_empty(&pl330->req_done)) { - struct pl330_req *r; - - rqdone = container_of(pl330->req_done.next, - struct _pl330_req, rqd); - - list_del_init(&rqdone->rqd); - - /* Detach the req */ - r = rqdone->r; - rqdone->r = NULL; + list_for_each_entry_safe(rqdone, tmp, &pl330->req_done, rqd) { + list_del(&rqdone->rqd); spin_unlock_irqrestore(&pl330->lock, flags); - _callback(r, PL330_ERR_NONE); + _callback(rqdone, PL330_ERR_NONE); spin_lock_irqsave(&pl330->lock, flags); } diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 01dd9a7daf7..04715efe2a3 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -735,6 +735,8 @@ static const char *keys[KEY_MAX + 1] = { [KEY_ALTERASE] = "AlternateErase", [KEY_CANCEL] = "Cancel", [KEY_BRIGHTNESSDOWN] = "BrightnessDown", [KEY_BRIGHTNESSUP] = "BrightnessUp", [KEY_MEDIA] = "Media", [KEY_UNKNOWN] = "Unknown", + [BTN_DPAD_UP] = "BtnDPadUp", [BTN_DPAD_DOWN] = "BtnDPadDown", + [BTN_DPAD_LEFT] = "BtnDPadLeft", [BTN_DPAD_RIGHT] = "BtnDPadRight", [BTN_0] = "Btn0", [BTN_1] = "Btn1", [BTN_2] = "Btn2", [BTN_3] = "Btn3", [BTN_4] = "Btn4", [BTN_5] = "Btn5", @@ -764,7 +766,8 @@ static const char *keys[KEY_MAX + 1] = { [BTN_TOOL_MOUSE] = "ToolMouse", [BTN_TOOL_LENS] = "ToolLens", [BTN_TOUCH] = "Touch", [BTN_STYLUS] = "Stylus", [BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap", - [BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_GEAR_DOWN] = "WheelBtn", + [BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_TOOL_QUADTAP] = "ToolQuadrupleTap", + [BTN_GEAR_DOWN] = "WheelBtn", [BTN_GEAR_UP] = "Gear up", [KEY_OK] = "Ok", [KEY_SELECT] = "Select", [KEY_GOTO] = "Goto", [KEY_CLEAR] = "Clear", [KEY_POWER2] = "Power2", @@ -819,6 +822,16 @@ static const char *keys[KEY_MAX + 1] = { [KEY_KBDILLUMDOWN] = "KbdIlluminationDown", [KEY_KBDILLUMUP] = "KbdIlluminationUp", [KEY_SWITCHVIDEOMODE] = "SwitchVideoMode", + [KEY_BUTTONCONFIG] = "ButtonConfig", + [KEY_TASKMANAGER] = "TaskManager", + [KEY_JOURNAL] = "Journal", + [KEY_CONTROLPANEL] = "ControlPanel", + [KEY_APPSELECT] = "AppSelect", + [KEY_SCREENSAVER] = "ScreenSaver", + [KEY_VOICECOMMAND] = "VoiceCommand", + [KEY_BRIGHTNESS_MIN] = "BrightnessMin", + [KEY_BRIGHTNESS_MAX] = "BrightnessMax", + [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto", }; static const char *relatives[REL_MAX + 1] = { diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 5b3ca5ab1d8..d2dc1aee9bc 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -687,6 +687,13 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x06c: map_key_clear(KEY_YELLOW); break; case 0x06d: map_key_clear(KEY_ZOOM); break; + case 0x06f: map_key_clear(KEY_BRIGHTNESSUP); break; + case 0x070: map_key_clear(KEY_BRIGHTNESSDOWN); break; + case 0x072: map_key_clear(KEY_BRIGHTNESS_TOGGLE); break; + case 0x073: map_key_clear(KEY_BRIGHTNESS_MIN); break; + case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX); break; + case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO); break; + case 0x082: map_key_clear(KEY_VIDEO_NEXT); break; case 0x083: map_key_clear(KEY_LAST); break; case 0x084: map_key_clear(KEY_ENTER); break; @@ -727,6 +734,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x0bf: map_key_clear(KEY_SLOW); break; case 0x0cd: map_key_clear(KEY_PLAYPAUSE); break; + case 0x0cf: map_key_clear(KEY_VOICECOMMAND); break; case 0x0e0: map_abs_clear(ABS_VOLUME); break; case 0x0e2: map_key_clear(KEY_MUTE); break; case 0x0e5: map_key_clear(KEY_BASSBOOST); break; @@ -734,6 +742,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x0ea: map_key_clear(KEY_VOLUMEDOWN); break; case 0x0f5: map_key_clear(KEY_SLOW); break; + case 0x181: map_key_clear(KEY_BUTTONCONFIG); break; case 0x182: map_key_clear(KEY_BOOKMARKS); break; case 0x183: map_key_clear(KEY_CONFIG); break; case 0x184: map_key_clear(KEY_WORDPROCESSOR); break; @@ -747,6 +756,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x18c: map_key_clear(KEY_VOICEMAIL); break; case 0x18d: map_key_clear(KEY_ADDRESSBOOK); break; case 0x18e: map_key_clear(KEY_CALENDAR); break; + case 0x18f: map_key_clear(KEY_TASKMANAGER); break; + case 0x190: map_key_clear(KEY_JOURNAL); break; case 0x191: map_key_clear(KEY_FINANCE); break; case 0x192: map_key_clear(KEY_CALC); break; case 0x193: map_key_clear(KEY_PLAYER); break; @@ -755,10 +766,16 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x199: map_key_clear(KEY_CHAT); break; case 0x19c: map_key_clear(KEY_LOGOFF); break; case 0x19e: map_key_clear(KEY_COFFEE); break; + case 0x19f: map_key_clear(KEY_CONTROLPANEL); break; + case 0x1a2: map_key_clear(KEY_APPSELECT); break; + case 0x1a3: map_key_clear(KEY_NEXT); break; + case 0x1a4: map_key_clear(KEY_PREVIOUS); break; case 0x1a6: map_key_clear(KEY_HELP); break; case 0x1a7: map_key_clear(KEY_DOCUMENTS); break; case 0x1ab: map_key_clear(KEY_SPELLCHECK); break; case 0x1ae: map_key_clear(KEY_KEYBOARD); break; + case 0x1b1: map_key_clear(KEY_SCREENSAVER); break; + case 0x1b4: map_key_clear(KEY_FILE); break; case 0x1b6: map_key_clear(KEY_IMAGES); break; case 0x1b7: map_key_clear(KEY_AUDIO); break; case 0x1b8: map_key_clear(KEY_VIDEO); break; @@ -1096,6 +1113,69 @@ unsigned int hidinput_count_leds(struct hid_device *hid) } EXPORT_SYMBOL_GPL(hidinput_count_leds); +static void hidinput_led_worker(struct work_struct *work) +{ + struct hid_device *hid = container_of(work, struct hid_device, + led_work); + struct hid_field *field; + struct hid_report *report; + int len; + __u8 *buf; + + field = hidinput_get_led_field(hid); + if (!field) + return; + + /* + * field->report is accessed unlocked regarding HID core. So there might + * be another incoming SET-LED request from user-space, which changes + * the LED state while we assemble our outgoing buffer. However, this + * doesn't matter as hid_output_report() correctly converts it into a + * boolean value no matter what information is currently set on the LED + * field (even garbage). So the remote device will always get a valid + * request. + * And in case we send a wrong value, a next led worker is spawned + * for every SET-LED request so the following worker will send the + * correct value, guaranteed! + */ + + report = field->report; + + len = ((report->size - 1) >> 3) + 1 + (report->id > 0); + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return; + + hid_output_report(report, buf); + /* synchronous output report */ + hid->hid_output_raw_report(hid, buf, len, HID_OUTPUT_REPORT); + kfree(buf); +} + +static int hidinput_input_event(struct input_dev *dev, unsigned int type, + unsigned int code, int value) +{ + struct hid_device *hid = input_get_drvdata(dev); + struct hid_field *field; + int offset; + + if (type == EV_FF) + return input_ff_event(dev, type, code, value); + + if (type != EV_LED) + return -1; + + if ((offset = hidinput_find_field(hid, type, code, &field)) == -1) { + hid_warn(dev, "event field not found\n"); + return -1; + } + + hid_set_field(field, offset, value); + + schedule_work(&hid->led_work); + return 0; +} + static int hidinput_open(struct input_dev *dev) { struct hid_device *hid = input_get_drvdata(dev); @@ -1150,6 +1230,7 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) int i, j, k; INIT_LIST_HEAD(&hid->inputs); + INIT_WORK(&hid->led_work, hidinput_led_worker); if (!force) { for (i = 0; i < hid->maxcollection; i++) { @@ -1187,8 +1268,13 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) } input_set_drvdata(input_dev, hid); - input_dev->event = - hid->ll_driver->hidinput_input_event; + + if(hid->ll_driver->hidinput_input_event) { + input_dev->event = + hid->ll_driver->hidinput_input_event; + } else if (hid->hid_output_raw_report) { + input_dev->event = hidinput_input_event; + } input_dev->open = hidinput_open; input_dev->close = hidinput_close; input_dev->setkeycode = hidinput_setkeycode; @@ -1267,6 +1353,12 @@ void hidinput_disconnect(struct hid_device *hid) input_unregister_device(hidinput->input); kfree(hidinput); } + + /* led_work is spawned by input_dev callbacks, but doesn't access the + * parent input_dev at all. Once all input devices are removed, we + * know that led_work will never get restarted, so we can cancel it + * synchronously and are safe. */ + cancel_work_sync(&hid->led_work); } EXPORT_SYMBOL_GPL(hidinput_disconnect); diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 419b1f0944f..54f9de431af 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -37,6 +37,8 @@ struct evdev { struct mutex mutex; struct device dev; bool exist; + int hw_ts_sec; + int hw_ts_nsec; }; struct evdev_client { @@ -109,7 +111,20 @@ static void evdev_event(struct input_handle *handle, struct input_event event; ktime_t time_mono, time_real; - time_mono = ktime_get(); + if (type == EV_SYN && code == SYN_TIME_SEC) { + evdev->hw_ts_sec = value; + return; + } + if (type == EV_SYN && code == SYN_TIME_NSEC) { + evdev->hw_ts_nsec = value; + return; + } + + if (evdev->hw_ts_sec != -1 && evdev->hw_ts_nsec != -1) + time_mono = ktime_set(evdev->hw_ts_sec, evdev->hw_ts_nsec); + else + time_mono = ktime_get(); + time_real = ktime_sub(time_mono, ktime_get_monotonic_offset()); event.type = type; @@ -128,8 +143,11 @@ static void evdev_event(struct input_handle *handle, rcu_read_unlock(); - if (type == EV_SYN && code == SYN_REPORT) + if (type == EV_SYN && code == SYN_REPORT) { + evdev->hw_ts_sec = -1; + evdev->hw_ts_nsec = -1; wake_up_interruptible(&evdev->wait); + } } static int evdev_fasync(int fd, struct file *file, int on) @@ -1062,6 +1080,8 @@ static int evdev_connect(struct input_handler *handler, struct input_dev *dev, dev_set_name(&evdev->dev, "event%d", minor); evdev->exist = true; evdev->minor = minor; + evdev->hw_ts_sec = -1; + evdev->hw_ts_nsec = -1; evdev->handle.dev = input_get_device(dev); evdev->handle.name = dev_name(&evdev->dev); diff --git a/drivers/input/input.c b/drivers/input/input.c index 9c692c4e6ca..fd8c0427dee 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -223,6 +223,8 @@ static void input_handle_event(struct input_dev *dev, case EV_SYN: switch (code) { case SYN_CONFIG: + case SYN_TIME_SEC: + case SYN_TIME_NSEC: disposition = INPUT_PASS_TO_ALL; break; diff --git a/drivers/input/misc/gpio_input.c b/drivers/input/misc/gpio_input.c index e59b79e34b1..4dcad7d16ab 100644 --- a/drivers/input/misc/gpio_input.c +++ b/drivers/input/misc/gpio_input.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include enum { DEBOUNCE_UNSTABLE = BIT(0), /* Got irq, while debouncing */ @@ -46,7 +46,7 @@ struct gpio_input_state { int debounce_count; int is_removing; spinlock_t irq_lock; - struct wake_lock wake_lock; + struct wakeup_source *ws; struct gpio_key_state key_state[0]; }; @@ -158,7 +158,7 @@ static enum hrtimer_restart gpio_event_input_timer_func(struct hrtimer *timer) else if (!ds->use_irq && !ds->is_removing) hrtimer_start(timer, ds->info->poll_time, HRTIMER_MODE_REL); else - wake_unlock(&ds->wake_lock); + __pm_relax(ds->ws); spin_unlock_irqrestore(&ds->irq_lock, irqflags); @@ -184,7 +184,7 @@ static irqreturn_t gpio_event_input_irq_handler(int irq, void *dev_id) if (ks->debounce & DEBOUNCE_WAIT_IRQ) { ks->debounce = DEBOUNCE_UNKNOWN; if (ds->debounce_count++ == 0) { - wake_lock(&ds->wake_lock); + __pm_stay_awake(ds->ws); hrtimer_start( &ds->timer, ds->info->debounce_time, HRTIMER_MODE_REL); @@ -267,6 +267,7 @@ int gpio_event_input_func(struct gpio_event_input_devs *input_devs, unsigned long irqflags; struct gpio_event_input_info *di; struct gpio_input_state *ds = *data; + char *wlname; di = container_of(info, struct gpio_event_input_info, info); @@ -302,7 +303,19 @@ int gpio_event_input_func(struct gpio_event_input_devs *input_devs, ds->debounce_count = di->keymap_size; ds->input_devs = input_devs; ds->info = di; - wake_lock_init(&ds->wake_lock, WAKE_LOCK_SUSPEND, "gpio_input"); + wlname = kasprintf(GFP_KERNEL, "gpio_input:%s%s", + input_devs->dev[0]->name, + (input_devs->count > 1) ? "..." : ""); + + ds->ws = wakeup_source_register(wlname); + kfree(wlname); + if (!ds->ws) { + ret = -ENOMEM; + pr_err("gpio_event_input_func: " + "Failed to allocate wakeup source\n"); + goto err_ws_failed; + } + spin_lock_init(&ds->irq_lock); for (i = 0; i < di->keymap_size; i++) { @@ -376,7 +389,8 @@ err_gpio_request_failed: ; } err_bad_keymap: - wake_lock_destroy(&ds->wake_lock); + wakeup_source_unregister(ds->ws); +err_ws_failed: kfree(ds); err_ds_alloc_failed: return ret; diff --git a/drivers/input/misc/keychord.c b/drivers/input/misc/keychord.c index 3ffab6da411..a5ea27ad0e1 100644 --- a/drivers/input/misc/keychord.c +++ b/drivers/input/misc/keychord.c @@ -126,8 +126,12 @@ static void keychord_event(struct input_handle *handle, unsigned int type, done: spin_unlock_irqrestore(&kdev->lock, flags); - if (got_chord) + if (got_chord) { + pr_info("keychord: got keychord id %d. Any tasks: %d\n", + keychord->id, + !list_empty_careful(&kdev->waitq.task_list)); wake_up_interruptible(&kdev->waitq); + } } static int keychord_connect(struct input_handler *handler, diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index 46b4c766335..a85ce094c4b 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -102,6 +102,12 @@ EXPORT_SYMBOL_GPL(led_trigger_show); void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trigger) { unsigned long flags; + char *event = NULL; + char *envp[2]; + const char *name; + + name = trigger ? trigger->name : "none"; + event = kasprintf(GFP_KERNEL, "TRIGGER=%s", name); /* Remove any existing trigger */ if (led_cdev->trigger) { @@ -122,6 +128,13 @@ void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trigger) if (trigger->activate) trigger->activate(led_cdev); } + + if (event) { + envp[0] = event; + envp[1] = NULL; + kobject_uevent_env(&led_cdev->dev->kobj, KOBJ_CHANGE, envp); + kfree(event); + } } EXPORT_SYMBOL_GPL(led_trigger_set); diff --git a/drivers/misc/uid_stat.c b/drivers/misc/uid_stat.c index 2141124a6c1..509822c81e9 100644 --- a/drivers/misc/uid_stat.c +++ b/drivers/misc/uid_stat.c @@ -38,17 +38,13 @@ struct uid_stat { }; static struct uid_stat *find_uid_stat(uid_t uid) { - unsigned long flags; struct uid_stat *entry; - spin_lock_irqsave(&uid_lock, flags); list_for_each_entry(entry, &uid_list, link) { if (entry->uid == uid) { - spin_unlock_irqrestore(&uid_lock, flags); return entry; } } - spin_unlock_irqrestore(&uid_lock, flags); return NULL; } @@ -90,13 +86,10 @@ static int tcp_rcv_read_proc(char *page, char **start, off_t off, /* Create a new entry for tracking the specified uid. */ static struct uid_stat *create_stat(uid_t uid) { - unsigned long flags; - char uid_s[32]; struct uid_stat *new_uid; - struct proc_dir_entry *entry; - /* Create the uid stat struct and append it to the list. */ - if ((new_uid = kmalloc(sizeof(struct uid_stat), GFP_KERNEL)) == NULL) + new_uid = kmalloc(sizeof(struct uid_stat), GFP_ATOMIC); + if (!new_uid) return NULL; new_uid->uid = uid; @@ -104,11 +97,15 @@ static struct uid_stat *create_stat(uid_t uid) { atomic_set(&new_uid->tcp_rcv, INT_MIN); atomic_set(&new_uid->tcp_snd, INT_MIN); - spin_lock_irqsave(&uid_lock, flags); list_add_tail(&new_uid->link, &uid_list); - spin_unlock_irqrestore(&uid_lock, flags); + return new_uid; +} - sprintf(uid_s, "%d", uid); +static void create_stat_proc(struct uid_stat *new_uid) +{ + char uid_s[32]; + struct proc_dir_entry *entry; + sprintf(uid_s, "%d", new_uid->uid); entry = proc_mkdir(uid_s, parent); /* Keep reference to uid_stat so we know what uid to read stats from. */ @@ -117,17 +114,31 @@ static struct uid_stat *create_stat(uid_t uid) { create_proc_read_entry("tcp_rcv", S_IRUGO, entry, tcp_rcv_read_proc, (void *) new_uid); +} - return new_uid; +static struct uid_stat *find_or_create_uid_stat(uid_t uid) +{ + struct uid_stat *entry; + unsigned long flags; + spin_lock_irqsave(&uid_lock, flags); + entry = find_uid_stat(uid); + if (entry) { + spin_unlock_irqrestore(&uid_lock, flags); + return entry; + } + entry = create_stat(uid); + spin_unlock_irqrestore(&uid_lock, flags); + if (entry) + create_stat_proc(entry); + return entry; } int uid_stat_tcp_snd(uid_t uid, int size) { struct uid_stat *entry; activity_stats_update(); - if ((entry = find_uid_stat(uid)) == NULL && - ((entry = create_stat(uid)) == NULL)) { - return -1; - } + entry = find_or_create_uid_stat(uid); + if (!entry) + return -1; atomic_add(size, &entry->tcp_snd); return 0; } @@ -135,10 +146,9 @@ int uid_stat_tcp_snd(uid_t uid, int size) { int uid_stat_tcp_rcv(uid_t uid, int size) { struct uid_stat *entry; activity_stats_update(); - if ((entry = find_uid_stat(uid)) == NULL && - ((entry = create_stat(uid)) == NULL)) { - return -1; - } + entry = find_or_create_uid_stat(uid); + if (!entry) + return -1; atomic_add(size, &entry->tcp_rcv); return 0; } diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index e87ebb0db69..af6b5376c57 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -38,6 +38,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + #include #include #include @@ -1592,9 +1595,11 @@ static int mmc_blk_issue_sanitize_rq(struct mmc_queue *mq, pr_debug("%s: %s - SANITIZE IN PROGRESS...\n", mmc_hostname(card->host), __func__); + trace_mmc_blk_erase_start(EXT_CSD_SANITIZE_START, 0, 0); err = mmc_switch_ignore_timeout(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_SANITIZE_START, 1, MMC_SANITIZE_REQ_TIMEOUT); + trace_mmc_blk_erase_end(EXT_CSD_SANITIZE_START, 0, 0); if (err) pr_err("%s: %s - mmc_switch() with " diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 1cfb7e9b2f6..261667b0173 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -52,7 +52,6 @@ #define ST_LOG(fmt,...) #endif -#define CREATE_TRACE_POINTS #include static void mmc_clk_scaling(struct mmc_host *host, bool from_wq); @@ -273,6 +272,7 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) pr_debug("%s: %d bytes transferred: %d\n", mmc_hostname(host), mrq->data->bytes_xfered, mrq->data->error); + trace_mmc_blk_rw_end(cmd->opcode, cmd->arg, mrq->data); } if (mrq->stop) { @@ -926,6 +926,9 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host, /* Prepare a new request */ if (areq) { + trace_mmc_blk_rw_start(areq->mrq->cmd->opcode, + areq->mrq->cmd->arg, + areq->mrq->data); /* * start waiting here for possible interrupt * because mmc_pre_req() taking long time @@ -2310,10 +2313,15 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from, struct mmc_command cmd = {0}; unsigned int qty = 0; unsigned long timeout; + unsigned int fr, nr; int err; u32 *resp = card->raw_csd; + fr = from; + nr = to - from + 1; + trace_mmc_blk_erase_start(arg, fr, nr); + /* For WriteProtection */ if (UNSTUFF_BITS(resp, 12, 2)) { printk(KERN_ERR "eMMC set Write Protection mode, Can't be written or erased."); @@ -2431,6 +2439,8 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from, } while (!(cmd.resp[0] & R1_READY_FOR_DATA) || (R1_CURRENT_STATE(cmd.resp[0]) == R1_STATE_PRG)); out: + + trace_mmc_blk_erase_end(arg, fr, nr); return err; } diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index e22e890f8be..3caa74d4af7 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -795,7 +795,7 @@ void mmc_free_host(struct mmc_host *host) idr_remove(&mmc_host_idr, host->index); spin_unlock(&mmc_host_lock); wake_lock_destroy(&host->detect_wake_lock); - + kfree(host->wlock_name); put_device(&host->class_dev); } diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 65f2ba7e586..fda4379a1ff 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -1348,4 +1348,4 @@ void sdio_ctrl_power(struct mmc_host *host, bool onoff) mmc_release_host(host); } EXPORT_SYMBOL(sdio_ctrl_power); -#endif /* CONFIG_BCM4339 || CONFIG_BCM4335 || CONFIG_BCM4354 */ \ No newline at end of file +#endif /* CONFIG_BCM4339 || CONFIG_BCM4335 || CONFIG_BCM4354 */ diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 1c01cd52b6c..c4b89ef1797 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2880,6 +2880,9 @@ ppp_disconnect_channel(struct channel *pch) */ static void ppp_destroy_channel(struct channel *pch) { + put_net(pch->chan_net); + pch->chan_net = NULL; + atomic_dec(&channel_count); if (!pch->file.dead) { diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index f10e1d45b5a..b1e746f9b54 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -225,7 +225,7 @@ static void power_supply_changed_work(struct work_struct *work) spin_lock_irqsave(&psy->changed_lock, flags); } if (!psy->changed) - wake_unlock(&psy->work_wake_lock); + pm_relax(psy->dev); spin_unlock_irqrestore(&psy->changed_lock, flags); } @@ -237,7 +237,7 @@ void power_supply_changed(struct power_supply *psy) spin_lock_irqsave(&psy->changed_lock, flags); psy->changed = true; - wake_lock(&psy->work_wake_lock); + pm_stay_awake(psy->dev); spin_unlock_irqrestore(&psy->changed_lock, flags); schedule_work(&psy->changed_work); } @@ -379,7 +379,9 @@ int power_supply_register(struct device *parent, struct power_supply *psy) goto device_add_failed; spin_lock_init(&psy->changed_lock); - wake_lock_init(&psy->work_wake_lock, WAKE_LOCK_SUSPEND, "power-supply"); + rc = device_init_wakeup(dev, true); + if (rc) + goto wakeup_init_failed; rc = power_supply_create_triggers(psy); if (rc) @@ -390,7 +392,7 @@ int power_supply_register(struct device *parent, struct power_supply *psy) goto success; create_triggers_failed: - wake_lock_destroy(&psy->work_wake_lock); +wakeup_init_failed: device_del(dev); kobject_set_name_failed: device_add_failed: @@ -405,7 +407,6 @@ void power_supply_unregister(struct power_supply *psy) cancel_work_sync(&psy->changed_work); sysfs_remove_link(&psy->dev->kobj, "powers"); power_supply_remove_triggers(psy); - wake_lock_destroy(&psy->work_wake_lock); device_unregister(psy->dev); } EXPORT_SYMBOL_GPL(power_supply_unregister); diff --git a/drivers/power/smb347-charger.c b/drivers/power/smb347-charger.c index ce1694d1a36..fb56ec755ce 100644 --- a/drivers/power/smb347-charger.c +++ b/drivers/power/smb347-charger.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Configuration registers. These are mirrored to volatile RAM and can be @@ -38,14 +39,20 @@ #define CFG_CURRENT_LIMIT_DC_MASK 0xf0 #define CFG_CURRENT_LIMIT_DC_SHIFT 4 #define CFG_CURRENT_LIMIT_USB_MASK 0x0f +#define CFG_VARIOUS_FUNCTION 0x02 +#define CFG_INPUT_SOURCE_PRIORITY BIT(2) #define CFG_FLOAT_VOLTAGE 0x03 #define CFG_FLOAT_VOLTAGE_THRESHOLD_MASK 0xc0 +#define CFG_FLOAT_VOLTAGE_MASK 0x3F #define CFG_FLOAT_VOLTAGE_THRESHOLD_SHIFT 6 +#define CFG_CHARGE_CONTROL 0x04 +#define CFG_AUTOMATIC_RECHARGE_DISABLE BIT(7) #define CFG_STAT 0x05 #define CFG_STAT_DISABLED BIT(5) #define CFG_STAT_ACTIVE_HIGH BIT(7) #define CFG_PIN 0x06 #define CFG_PIN_EN_CTRL_MASK 0x60 +#define CFG_PIN_USB_MODE_CTRL BIT(4) #define CFG_PIN_EN_CTRL_ACTIVE_HIGH 0x40 #define CFG_PIN_EN_CTRL_ACTIVE_LOW 0x60 #define CFG_PIN_EN_APSD_IRQ BIT(1) @@ -85,8 +92,12 @@ #define CMD_A 0x30 #define CMD_A_CHG_ENABLED BIT(1) #define CMD_A_SUSPEND_ENABLED BIT(2) +#define CMD_A_OTG_ENABLE BIT(4) #define CMD_A_ALLOW_WRITE BIT(7) #define CMD_B 0x31 +#define CMD_B_POR BIT(7) +#define CMD_B_USB59_MODE BIT(1) +#define CMD_B_HC_MODE BIT(0) #define CMD_C 0x33 /* Interrupt Status registers */ @@ -108,6 +119,7 @@ #define STAT_B 0x3c #define STAT_C 0x3d #define STAT_C_CHG_ENABLED BIT(0) +#define STAT_C_CHG_STATUS BIT(5) #define STAT_C_CHG_MASK 0x06 #define STAT_C_CHG_SHIFT 1 #define STAT_C_CHARGER_ERROR BIT(6) @@ -135,6 +147,11 @@ struct smb347_charger { bool mains_online; bool usb_online; bool charging_enabled; + unsigned int mains_current_limit; + bool usb_hc_mode; + bool usb_otg_enabled; + bool is_fully_charged; + int en_gpio; struct dentry *dentry; const struct smb347_charger_platform_data *pdata; }; @@ -315,9 +332,17 @@ static int smb347_charging_set(struct smb347_charger *smb, bool enable) { int ret = 0; + if (enable && !smb->charging_enabled) + smb->is_fully_charged = false; + if (smb->pdata->enable_control != SMB347_CHG_ENABLE_SW) { - dev_dbg(&smb->client->dev, - "charging enable/disable in SW disabled\n"); + smb->charging_enabled = enable; + + if (smb->en_gpio) + gpio_set_value( + smb->en_gpio, + (smb->pdata->enable_control == + SMB347_CHG_ENABLE_PIN_ACTIVE_LOW) ^ enable); return 0; } @@ -424,9 +449,9 @@ static int smb347_set_current_limits(struct smb347_charger *smb) if (ret < 0) return ret; - if (smb->pdata->mains_current_limit) { + if (smb->mains_current_limit) { val = current_to_hw(icl_tbl, ARRAY_SIZE(icl_tbl), - smb->pdata->mains_current_limit); + smb->mains_current_limit); if (val < 0) return val; @@ -473,6 +498,7 @@ static int smb347_set_voltage_limits(struct smb347_charger *smb) val = clamp_val(val, 3500000, 4500000) - 3500000; val /= 20000; + ret &= ~CFG_FLOAT_VOLTAGE_MASK; ret |= val; } @@ -662,169 +688,6 @@ static int smb347_set_writable(struct smb347_charger *smb, bool writable) return smb347_write(smb, CMD_A, ret); } -static int smb347_hw_init(struct smb347_charger *smb) -{ - int ret; - - ret = smb347_set_writable(smb, true); - if (ret < 0) - return ret; - - /* - * Program the platform specific configuration values to the device - * first. - */ - ret = smb347_set_charge_current(smb); - if (ret < 0) - goto fail; - - ret = smb347_set_current_limits(smb); - if (ret < 0) - goto fail; - - ret = smb347_set_voltage_limits(smb); - if (ret < 0) - goto fail; - - ret = smb347_set_temp_limits(smb); - if (ret < 0) - goto fail; - - /* If USB charging is disabled we put the USB in suspend mode */ - if (!smb->pdata->use_usb) { - ret = smb347_read(smb, CMD_A); - if (ret < 0) - goto fail; - - ret |= CMD_A_SUSPEND_ENABLED; - - ret = smb347_write(smb, CMD_A, ret); - if (ret < 0) - goto fail; - } - - ret = smb347_read(smb, CFG_OTHER); - if (ret < 0) - goto fail; - - /* - * If configured by platform data, we enable hardware Auto-OTG - * support for driving VBUS. Otherwise we disable it. - */ - ret &= ~CFG_OTHER_RID_MASK; - if (smb->pdata->use_usb_otg) - ret |= CFG_OTHER_RID_ENABLED_AUTO_OTG; - - ret = smb347_write(smb, CFG_OTHER, ret); - if (ret < 0) - goto fail; - - ret = smb347_read(smb, CFG_PIN); - if (ret < 0) - goto fail; - - /* - * Make the charging functionality controllable by a write to the - * command register unless pin control is specified in the platform - * data. - */ - ret &= ~CFG_PIN_EN_CTRL_MASK; - - switch (smb->pdata->enable_control) { - case SMB347_CHG_ENABLE_SW: - /* Do nothing, 0 means i2c control */ - break; - case SMB347_CHG_ENABLE_PIN_ACTIVE_LOW: - ret |= CFG_PIN_EN_CTRL_ACTIVE_LOW; - break; - case SMB347_CHG_ENABLE_PIN_ACTIVE_HIGH: - ret |= CFG_PIN_EN_CTRL_ACTIVE_HIGH; - break; - } - - /* Disable Automatic Power Source Detection (APSD) interrupt. */ - ret &= ~CFG_PIN_EN_APSD_IRQ; - - ret = smb347_write(smb, CFG_PIN, ret); - if (ret < 0) - goto fail; - - ret = smb347_update_status(smb); - if (ret < 0) - goto fail; - - ret = smb347_update_online(smb); - -fail: - smb347_set_writable(smb, false); - return ret; -} - -static irqreturn_t smb347_interrupt(int irq, void *data) -{ - struct smb347_charger *smb = data; - int stat_c, irqstat_e, irqstat_c; - irqreturn_t ret = IRQ_NONE; - - stat_c = smb347_read(smb, STAT_C); - if (stat_c < 0) { - dev_warn(&smb->client->dev, "reading STAT_C failed\n"); - return IRQ_NONE; - } - - irqstat_c = smb347_read(smb, IRQSTAT_C); - if (irqstat_c < 0) { - dev_warn(&smb->client->dev, "reading IRQSTAT_C failed\n"); - return IRQ_NONE; - } - - irqstat_e = smb347_read(smb, IRQSTAT_E); - if (irqstat_e < 0) { - dev_warn(&smb->client->dev, "reading IRQSTAT_E failed\n"); - return IRQ_NONE; - } - - /* - * If we get charger error we report the error back to user and - * disable charging. - */ - if (stat_c & STAT_C_CHARGER_ERROR) { - dev_err(&smb->client->dev, - "error in charger, disabling charging\n"); - - smb347_charging_disable(smb); - power_supply_changed(&smb->battery); - - ret = IRQ_HANDLED; - } - - /* - * If we reached the termination current the battery is charged and - * we can update the status now. Charging is automatically - * disabled by the hardware. - */ - if (irqstat_c & (IRQSTAT_C_TERMINATION_IRQ | IRQSTAT_C_TAPER_IRQ)) { - if (irqstat_c & IRQSTAT_C_TERMINATION_STAT) - power_supply_changed(&smb->battery); - ret = IRQ_HANDLED; - } - - /* - * If we got an under voltage interrupt it means that AC/USB input - * was connected or disconnected. - */ - if (irqstat_e & (IRQSTAT_E_USBIN_UV_IRQ | IRQSTAT_E_DCIN_UV_IRQ)) { - if (smb347_update_status(smb) > 0) { - smb347_update_online(smb); - power_supply_changed(&smb->mains); - power_supply_changed(&smb->usb); - } - ret = IRQ_HANDLED; - } - - return ret; -} - static int smb347_irq_set(struct smb347_charger *smb, bool enable) { int ret; @@ -889,6 +752,73 @@ static inline int smb347_irq_disable(struct smb347_charger *smb) return smb347_irq_set(smb, false); } +static irqreturn_t smb347_interrupt(int irq, void *data) +{ + struct smb347_charger *smb = data; + int stat_c, t; + u8 irqstat[6]; + irqreturn_t ret = IRQ_NONE; + + t = i2c_smbus_read_i2c_block_data(smb->client, IRQSTAT_A, 6, irqstat); + if (t < 0) { + dev_warn(&smb->client->dev, + "reading IRQSTAT registers failed\n"); + return IRQ_NONE; + } + + stat_c = smb347_read(smb, STAT_C); + if (stat_c < 0) { + dev_warn(&smb->client->dev, "reading STAT_C failed\n"); + return IRQ_NONE; + } + + pr_debug("%s: stat c=%x irq a=%x b=%x c=%x d=%x e=%x f=%x\n", + __func__, stat_c, irqstat[0], irqstat[1], irqstat[2], + irqstat[3], irqstat[4], irqstat[5]); + + /* + * If we get charger error we report the error back to user and + * disable charging. + */ + if (stat_c & STAT_C_CHARGER_ERROR) { + dev_err(&smb->client->dev, + "error in charger, disabling charging\n"); + + smb347_charging_disable(smb); + power_supply_changed(&smb->battery); + + ret = IRQ_HANDLED; + } else if (((stat_c & STAT_C_CHG_STATUS) || + (irqstat[2] & (IRQSTAT_C_TERMINATION_IRQ | + IRQSTAT_C_TERMINATION_STAT))) && + !smb->is_fully_charged) { + dev_info(&smb->client->dev, "charge terminated\n"); + smb->is_fully_charged = true; + smb347_charging_disable(smb); + power_supply_changed(&smb->battery); + ret = IRQ_HANDLED; + } + + if (irqstat[2] & IRQSTAT_C_TAPER_IRQ) + ret = IRQ_HANDLED; + + /* + * If we got an under voltage interrupt it means that AC/USB input + * was disconnected. + */ + if (irqstat[4] & (IRQSTAT_E_USBIN_UV_IRQ | IRQSTAT_E_DCIN_UV_IRQ)) + ret = IRQ_HANDLED; + + if (smb347_update_status(smb) > 0) { + smb347_update_online(smb); + power_supply_changed(&smb->mains); + power_supply_changed(&smb->usb); + ret = IRQ_HANDLED; + } + + return ret; +} + static int smb347_irq_init(struct smb347_charger *smb) { const struct smb347_charger_platform_data *pdata = smb->pdata; @@ -899,42 +829,20 @@ static int smb347_irq_init(struct smb347_charger *smb) goto fail; ret = request_threaded_irq(irq, NULL, smb347_interrupt, - IRQF_TRIGGER_FALLING, smb->client->name, - smb); + pdata->disable_stat_interrupts ? + IRQF_TRIGGER_RISING | IRQF_ONESHOT : + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + smb->client->name, smb); if (ret < 0) goto fail_gpio; - ret = smb347_set_writable(smb, true); - if (ret < 0) - goto fail_irq; + ret = enable_irq_wake(irq); + if (ret) + pr_err("%s: failed to enable wake on irq %d\n", __func__, irq); - /* - * Configure the STAT output to be suitable for interrupts: disable - * all other output (except interrupts) and make it active low. - */ - ret = smb347_read(smb, CFG_STAT); - if (ret < 0) - goto fail_readonly; - - ret &= ~CFG_STAT_ACTIVE_HIGH; - ret |= CFG_STAT_DISABLED; - - ret = smb347_write(smb, CFG_STAT, ret); - if (ret < 0) - goto fail_readonly; - - ret = smb347_irq_enable(smb); - if (ret < 0) - goto fail_readonly; - - smb347_set_writable(smb, false); smb->client->irq = irq; return 0; -fail_readonly: - smb347_set_writable(smb, false); -fail_irq: - free_irq(irq, smb); fail_gpio: gpio_free(pdata->irq_gpio); fail: @@ -942,6 +850,146 @@ fail: return ret; } +static int smb347_hw_init(struct smb347_charger *smb) +{ + int ret; + + ret = smb347_set_writable(smb, true); + if (ret < 0) + return ret; + + /* + * Program the platform specific configuration values to the device + * first. + */ + ret = smb347_set_charge_current(smb); + if (ret < 0) + goto fail; + + ret = smb347_set_current_limits(smb); + if (ret < 0) + goto fail; + + ret = smb347_set_voltage_limits(smb); + if (ret < 0) + goto fail; + +// HACK for Manta pre-alpha 0.2, TH_BATTERY not connected properly +#if 0 // HACK + ret = smb347_set_temp_limits(smb); + if (ret < 0) + goto fail; +#endif // HACK + + /* If USB charging is disabled we put the USB in suspend mode */ + if (!smb->pdata->use_usb) { + ret = smb347_read(smb, CMD_A); + if (ret < 0) + goto fail; + + ret |= CMD_A_SUSPEND_ENABLED; + + ret = smb347_write(smb, CMD_A, ret); + if (ret < 0) + goto fail; + } + + ret = smb347_read(smb, CFG_OTHER); + if (ret < 0) + goto fail; + + /* + * If configured by platform data, we enable hardware Auto-OTG + * support for driving VBUS. Otherwise we disable it. + */ + ret &= ~CFG_OTHER_RID_MASK; + if (smb->pdata->use_usb_otg) + ret |= CFG_OTHER_RID_ENABLED_AUTO_OTG; + + ret = smb347_write(smb, CFG_OTHER, ret); + if (ret < 0) + goto fail; + + /* If configured by platform data, disable AUTOMATIC RECHARGE */ + if (smb->pdata->disable_automatic_recharge) { + ret = smb347_read(smb, CFG_CHARGE_CONTROL); + if (ret < 0) + goto fail; + + ret |= CFG_AUTOMATIC_RECHARGE_DISABLE; + + ret = smb347_write(smb, CFG_CHARGE_CONTROL, ret); + if (ret < 0) + goto fail; + } + + ret = smb347_read(smb, CFG_PIN); + if (ret < 0) + goto fail; + + /* + * Make the charging functionality controllable by a write to the + * command register unless pin control is specified in the platform + * data. + */ + ret &= ~(CFG_PIN_EN_CTRL_MASK | CFG_PIN_USB_MODE_CTRL); + + switch (smb->pdata->enable_control) { + case SMB347_CHG_ENABLE_SW: + /* Do nothing, 0 means i2c control */ + break; + case SMB347_CHG_ENABLE_PIN_ACTIVE_LOW: + ret |= CFG_PIN_EN_CTRL_ACTIVE_LOW; + break; + case SMB347_CHG_ENABLE_PIN_ACTIVE_HIGH: + ret |= CFG_PIN_EN_CTRL_ACTIVE_HIGH; + break; + } + + if (smb->pdata->usb_mode_pin_ctrl) + ret |= CFG_PIN_USB_MODE_CTRL; + + /* Disable Automatic Power Source Detection (APSD) interrupt. */ + ret &= ~CFG_PIN_EN_APSD_IRQ; + + ret = smb347_write(smb, CFG_PIN, ret); + if (ret < 0) + goto fail; + + ret = smb347_update_status(smb); + if (ret < 0) + goto fail; + + ret = smb347_update_online(smb); + + if ((smb->pdata->irq_gpio >= 0) && + !smb->pdata->disable_stat_interrupts) { + /* + * Configure the STAT output to be suitable for interrupts: + * disable all other output (except interrupts) and make it + * active low. + */ + ret = smb347_read(smb, CFG_STAT); + if (ret < 0) + goto fail; + + ret &= ~CFG_STAT_ACTIVE_HIGH; + ret |= CFG_STAT_DISABLED; + + ret = smb347_write(smb, CFG_STAT, ret); + if (ret < 0) + goto fail; + + ret = smb347_irq_enable(smb); + if (ret < 0) + goto fail; + } + +fail: + smb347_set_writable(smb, false); + return ret; +} + static int smb347_mains_get_property(struct power_supply *psy, enum power_supply_property prop, union power_supply_propval *val) @@ -949,15 +997,83 @@ static int smb347_mains_get_property(struct power_supply *psy, struct smb347_charger *smb = container_of(psy, struct smb347_charger, mains); - if (prop == POWER_SUPPLY_PROP_ONLINE) { + switch (prop) { + case POWER_SUPPLY_PROP_ONLINE: val->intval = smb->mains_online; return 0; + + case POWER_SUPPLY_PROP_CURRENT_MAX: + val->intval = smb->mains_current_limit; + return 0; + + default: + return -EINVAL; } return -EINVAL; } +static int smb347_mains_set_property(struct power_supply *psy, + enum power_supply_property prop, + const union power_supply_propval *val) +{ + struct smb347_charger *smb = + container_of(psy, struct smb347_charger, mains); + int ret; + bool oldval; + + switch (prop) { + case POWER_SUPPLY_PROP_ONLINE: + oldval = smb->mains_online; + + smb->mains_online = val->intval; + + smb347_set_writable(smb, true); + + ret = smb347_read(smb, CMD_A); + if (ret < 0) + return -EINVAL; + + ret &= ~CMD_A_SUSPEND_ENABLED; + if (val->intval) + ret |= CMD_A_SUSPEND_ENABLED; + + ret = smb347_write(smb, CMD_A, ret); + + smb347_hw_init(smb); + + smb347_set_writable(smb, false); + + if (smb->mains_online != oldval) + power_supply_changed(psy); + return 0; + case POWER_SUPPLY_PROP_CURRENT_MAX: + smb->mains_current_limit = val->intval; + smb347_hw_init(smb); + return 0; + + default: + return -EINVAL; + } + + return -EINVAL; +} + +static int smb347_mains_property_is_writeable(struct power_supply *psy, + enum power_supply_property prop) +{ + switch (prop) { + case POWER_SUPPLY_PROP_CURRENT_MAX: + return 1; + default: + break; + } + + return 0; +} + static enum power_supply_property smb347_mains_properties[] = { POWER_SUPPLY_PROP_ONLINE, + POWER_SUPPLY_PROP_CURRENT_MAX, }; static int smb347_usb_get_property(struct power_supply *psy, @@ -967,15 +1083,94 @@ static int smb347_usb_get_property(struct power_supply *psy, struct smb347_charger *smb = container_of(psy, struct smb347_charger, usb); - if (prop == POWER_SUPPLY_PROP_ONLINE) { + switch (prop) { + case POWER_SUPPLY_PROP_ONLINE: val->intval = smb->usb_online; return 0; + + case POWER_SUPPLY_PROP_USB_HC: + val->intval = smb->usb_hc_mode; + return 0; + + case POWER_SUPPLY_PROP_USB_OTG: + val->intval = smb->usb_otg_enabled; + return 0; + + default: + break; } return -EINVAL; } +static int smb347_usb_set_property(struct power_supply *psy, + enum power_supply_property prop, + const union power_supply_propval *val) +{ + int ret = -EINVAL; + struct smb347_charger *smb = + container_of(psy, struct smb347_charger, usb); + bool oldval; + + switch (prop) { + case POWER_SUPPLY_PROP_ONLINE: + oldval = smb->usb_online; + smb->usb_online = val->intval; + + if (smb->usb_online != oldval) + power_supply_changed(psy); + ret = 0; + break; + case POWER_SUPPLY_PROP_USB_HC: + smb347_set_writable(smb, true); + ret = smb347_write(smb, CMD_B, val->intval ? + CMD_B_HC_MODE : CMD_B_USB59_MODE); + smb347_set_writable(smb, false); + smb->usb_hc_mode = val->intval; + break; + + case POWER_SUPPLY_PROP_USB_OTG: + ret = smb347_read(smb, CMD_A); + + if (ret < 0) + return ret; + + if (val->intval) + ret |= CMD_A_OTG_ENABLE; + else + ret &= ~CMD_A_OTG_ENABLE; + + ret = smb347_write(smb, CMD_A, ret); + + if (ret >= 0) + smb->usb_otg_enabled = val->intval; + + break; + + default: + break; + } + + return ret; +} + +static int smb347_usb_property_is_writeable(struct power_supply *psy, + enum power_supply_property prop) +{ + switch (prop) { + case POWER_SUPPLY_PROP_USB_HC: + case POWER_SUPPLY_PROP_USB_OTG: + return 1; + default: + break; + } + + return 0; +} + static enum power_supply_property smb347_usb_properties[] = { POWER_SUPPLY_PROP_ONLINE, + POWER_SUPPLY_PROP_USB_HC, + POWER_SUPPLY_PROP_USB_OTG, }; static int smb347_battery_get_property(struct power_supply *psy, @@ -991,16 +1186,25 @@ static int smb347_battery_get_property(struct power_supply *psy, if (ret < 0) return ret; + if (ret > 0) { + smb347_update_online(smb); + power_supply_changed(&smb->mains); + power_supply_changed(&smb->usb); + } + switch (prop) { case POWER_SUPPLY_PROP_STATUS: if (!smb347_is_online(smb)) { + smb->is_fully_charged = false; val->intval = POWER_SUPPLY_STATUS_DISCHARGING; break; } if (smb347_charging_status(smb)) val->intval = POWER_SUPPLY_STATUS_CHARGING; else - val->intval = POWER_SUPPLY_STATUS_FULL; + val->intval = smb->is_fully_charged ? + POWER_SUPPLY_STATUS_FULL : + POWER_SUPPLY_STATUS_NOT_CHARGING; break; case POWER_SUPPLY_PROP_CHARGE_TYPE: @@ -1078,6 +1282,10 @@ static int smb347_battery_get_property(struct power_supply *psy, val->intval = pdata->battery_info.charge_full_design; break; + case POWER_SUPPLY_PROP_CHARGE_ENABLED: + val->intval = smb->charging_enabled; + break; + case POWER_SUPPLY_PROP_MODEL_NAME: val->strval = pdata->battery_info.name; break; @@ -1089,6 +1297,39 @@ static int smb347_battery_get_property(struct power_supply *psy, return 0; } +static int smb347_battery_set_property(struct power_supply *psy, + enum power_supply_property prop, + const union power_supply_propval *val) +{ + int ret = -EINVAL; + struct smb347_charger *smb = + container_of(psy, struct smb347_charger, battery); + + switch (prop) { + case POWER_SUPPLY_PROP_CHARGE_ENABLED: + ret = smb347_charging_set(smb, val->intval); + break; + + default: + break; + } + + return ret; +} + +static int smb347_battery_property_is_writeable(struct power_supply *psy, + enum power_supply_property prop) +{ + switch (prop) { + case POWER_SUPPLY_PROP_CHARGE_ENABLED: + return 1; + default: + break; + } + + return 0; +} + static enum power_supply_property smb347_battery_properties[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_CHARGE_TYPE, @@ -1098,6 +1339,7 @@ static enum power_supply_property smb347_battery_properties[] = { POWER_SUPPLY_PROP_VOLTAGE_NOW, POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, + POWER_SUPPLY_PROP_CHARGE_ENABLED, POWER_SUPPLY_PROP_MODEL_NAME, }; @@ -1181,6 +1423,33 @@ static int smb347_probe(struct i2c_client *client, smb->client = client; smb->pdata = pdata; + smb->mains_current_limit = smb->pdata->mains_current_limit; + + if (pdata->en_gpio) { + ret = gpio_request_one( + pdata->en_gpio, + smb->pdata->enable_control == + SMB347_CHG_ENABLE_PIN_ACTIVE_LOW ? + GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, + smb->client->name); + if (ret < 0) + dev_warn(dev, "failed to claim EN GPIO: %d\n", ret); + else + smb->en_gpio = pdata->en_gpio; + } + + ret = smb347_write(smb, CMD_B, CMD_B_POR); + if (ret < 0) + return ret; + + msleep(20); + + ret = smb347_read(smb, CMD_B); + if (ret < 0) { + dev_err(dev, "failed read after reset\n"); + return ret; + } + ret = smb347_hw_init(smb); if (ret < 0) return ret; @@ -1188,6 +1457,8 @@ static int smb347_probe(struct i2c_client *client, smb->mains.name = "smb347-mains"; smb->mains.type = POWER_SUPPLY_TYPE_MAINS; smb->mains.get_property = smb347_mains_get_property; + smb->mains.set_property = smb347_mains_set_property; + smb->mains.property_is_writeable = smb347_mains_property_is_writeable; smb->mains.properties = smb347_mains_properties; smb->mains.num_properties = ARRAY_SIZE(smb347_mains_properties); smb->mains.supplied_to = battery; @@ -1196,6 +1467,8 @@ static int smb347_probe(struct i2c_client *client, smb->usb.name = "smb347-usb"; smb->usb.type = POWER_SUPPLY_TYPE_USB; smb->usb.get_property = smb347_usb_get_property; + smb->usb.set_property = smb347_usb_set_property; + smb->usb.property_is_writeable = smb347_usb_property_is_writeable; smb->usb.properties = smb347_usb_properties; smb->usb.num_properties = ARRAY_SIZE(smb347_usb_properties); smb->usb.supplied_to = battery; @@ -1204,9 +1477,17 @@ static int smb347_probe(struct i2c_client *client, smb->battery.name = "smb347-battery"; smb->battery.type = POWER_SUPPLY_TYPE_BATTERY; smb->battery.get_property = smb347_battery_get_property; + smb->battery.set_property = smb347_battery_set_property; + smb->battery.property_is_writeable = smb347_battery_property_is_writeable; smb->battery.properties = smb347_battery_properties; smb->battery.num_properties = ARRAY_SIZE(smb347_battery_properties); + if (smb->pdata->supplied_to) { + smb->battery.supplied_to = smb->pdata->supplied_to; + smb->battery.num_supplicants = smb->pdata->num_supplicants; + smb->battery.external_power_changed = power_supply_changed; + } + ret = power_supply_register(dev, &smb->mains); if (ret < 0) return ret; @@ -1250,6 +1531,7 @@ static int smb347_remove(struct i2c_client *client) if (client->irq) { smb347_irq_disable(smb); + disable_irq_wake(client->irq); free_irq(client->irq, smb); gpio_free(smb->pdata->irq_gpio); } @@ -1260,6 +1542,29 @@ static int smb347_remove(struct i2c_client *client) return 0; } +static int smb347_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (client->irq) + disable_irq(client->irq); + return 0; +} + +static int smb347_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (client->irq) + enable_irq(client->irq); + return 0; +} + +static const struct dev_pm_ops smb347_pm_ops = { + .suspend = smb347_suspend, + .resume = smb347_resume, +}; + static const struct i2c_device_id smb347_id[] = { { "smb347", 0 }, { } @@ -1269,6 +1574,7 @@ MODULE_DEVICE_TABLE(i2c, smb347_id); static struct i2c_driver smb347_driver = { .driver = { .name = "smb347", + .pm = &smb347_pm_ops, }, .probe = smb347_probe, .remove = __devexit_p(smb347_remove), diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index de7338c102b..ddff67a2f74 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -21,6 +21,9 @@ config ASHMEM POSIX SHM but with different behavior and sporting a simpler file-based API. + It is, in theory, a good memory allocator for low-memory devices, + because it can discard shared memory units when under memory pressure. + config ANDROID_LOGGER tristate "Android log driver" default n @@ -119,15 +122,6 @@ config EXCLUDE_LRU_LIVING_IN_CMA source "drivers/staging/android/switch/Kconfig" -config ANDROID_INTF_ALARM_DEV - bool "Android alarm driver" - depends on RTC_CLASS - default n - help - Provides non-wakeup and rtc backed wakeup alarms based on rtc or - elapsed realtime, and a non-wakeup alarm on the monotonic clock. - Also exports the alarm interface to user-space. - config RUNTIME_COMPCACHE bool "Android rtcc v3" depends on ZRAM && ANDROID diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile index 690bdabd0ea..4c569e4f983 100644 --- a/drivers/staging/android/Makefile +++ b/drivers/staging/android/Makefile @@ -1,3 +1,5 @@ +ccflags-y += -I$(src) # needed for trace events + obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o obj-$(CONFIG_ASHMEM) += ashmem.o obj-$(CONFIG_ANDROID_LOGGER) += logger.o @@ -7,7 +9,6 @@ obj-$(CONFIG_ANDROID_TIMED_OUTPUT) += timed_output.o obj-$(CONFIG_ANDROID_TIMED_GPIO) += timed_gpio.o obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER) += lowmemorykiller.o obj-$(CONFIG_ANDROID_SWITCH) += switch/ -obj-$(CONFIG_ANDROID_INTF_ALARM_DEV) += alarm-dev.o obj-$(CONFIG_PERSISTENT_TRACER) += trace_persistent.o obj-$(CONFIG_RUNTIME_COMPCACHE) += rtcc.o diff --git a/drivers/staging/android/alarm-dev.c b/drivers/staging/android/alarm-dev.c deleted file mode 100644 index e001fe586a8..00000000000 --- a/drivers/staging/android/alarm-dev.c +++ /dev/null @@ -1,373 +0,0 @@ -/* drivers/rtc/alarm-dev.c - * - * Copyright (C) 2007-2009 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "android_alarm.h" - -#define ANDROID_ALARM_PRINT_INFO (1U << 0) -#define ANDROID_ALARM_PRINT_IO (1U << 1) -#define ANDROID_ALARM_PRINT_INT (1U << 2) - -static int debug_mask = ANDROID_ALARM_PRINT_INFO; -module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP); - -#define pr_alarm(debug_level_mask, args...) \ - do { \ - if (debug_mask & ANDROID_ALARM_PRINT_##debug_level_mask) { \ - pr_info(args); \ - } \ - } while (0) - -#define ANDROID_ALARM_WAKEUP_MASK ( \ - ANDROID_ALARM_RTC_WAKEUP_MASK | \ - ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP_MASK) - -/* support old usespace code */ -#define ANDROID_ALARM_SET_OLD _IOW('a', 2, time_t) /* set alarm */ -#define ANDROID_ALARM_SET_AND_WAIT_OLD _IOW('a', 3, time_t) - -static int alarm_opened; -static DEFINE_SPINLOCK(alarm_slock); -static struct wake_lock alarm_wake_lock; -static DECLARE_WAIT_QUEUE_HEAD(alarm_wait_queue); -static uint32_t alarm_pending; -static uint32_t alarm_enabled; -static uint32_t wait_pending; - -struct devalarm { - union { - struct hrtimer hrt; - struct alarm alrm; - } u; - enum android_alarm_type type; -}; - -static struct devalarm alarms[ANDROID_ALARM_TYPE_COUNT]; - - -static int is_wakeup(enum android_alarm_type type) -{ - if (type == ANDROID_ALARM_RTC_WAKEUP || - type == ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP) - return 1; - return 0; -} - - -static void devalarm_start(struct devalarm *alrm, ktime_t exp) -{ - if (is_wakeup(alrm->type)) - alarm_start(&alrm->u.alrm, exp); - else - hrtimer_start(&alrm->u.hrt, exp, HRTIMER_MODE_ABS); -} - - -static int devalarm_try_to_cancel(struct devalarm *alrm) -{ - int ret; - if (is_wakeup(alrm->type)) - ret = alarm_try_to_cancel(&alrm->u.alrm); - else - ret = hrtimer_try_to_cancel(&alrm->u.hrt); - return ret; -} - -static void devalarm_cancel(struct devalarm *alrm) -{ - if (is_wakeup(alrm->type)) - alarm_cancel(&alrm->u.alrm); - else - hrtimer_cancel(&alrm->u.hrt); -} - - -static long alarm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - int rv = 0; - unsigned long flags; - struct timespec new_alarm_time; - struct timespec new_rtc_time; - struct timespec tmp_time; - struct rtc_time new_rtc_tm; - struct rtc_device *rtc_dev; - enum android_alarm_type alarm_type = ANDROID_ALARM_IOCTL_TO_TYPE(cmd); - uint32_t alarm_type_mask = 1U << alarm_type; - - if (alarm_type >= ANDROID_ALARM_TYPE_COUNT) - return -EINVAL; - - if (ANDROID_ALARM_BASE_CMD(cmd) != ANDROID_ALARM_GET_TIME(0)) { - if ((file->f_flags & O_ACCMODE) == O_RDONLY) - return -EPERM; - if (file->private_data == NULL && - cmd != ANDROID_ALARM_SET_RTC) { - spin_lock_irqsave(&alarm_slock, flags); - if (alarm_opened) { - spin_unlock_irqrestore(&alarm_slock, flags); - return -EBUSY; - } - alarm_opened = 1; - file->private_data = (void *)1; - spin_unlock_irqrestore(&alarm_slock, flags); - } - } - - switch (ANDROID_ALARM_BASE_CMD(cmd)) { - case ANDROID_ALARM_CLEAR(0): - spin_lock_irqsave(&alarm_slock, flags); - pr_alarm(IO, "alarm %d clear\n", alarm_type); - devalarm_try_to_cancel(&alarms[alarm_type]); - if (alarm_pending) { - alarm_pending &= ~alarm_type_mask; - if (!alarm_pending && !wait_pending) - wake_unlock(&alarm_wake_lock); - } - alarm_enabled &= ~alarm_type_mask; - spin_unlock_irqrestore(&alarm_slock, flags); - break; - - case ANDROID_ALARM_SET_OLD: - case ANDROID_ALARM_SET_AND_WAIT_OLD: - if (get_user(new_alarm_time.tv_sec, (int __user *)arg)) { - rv = -EFAULT; - goto err1; - } - new_alarm_time.tv_nsec = 0; - goto from_old_alarm_set; - - case ANDROID_ALARM_SET_AND_WAIT(0): - case ANDROID_ALARM_SET(0): - if (copy_from_user(&new_alarm_time, (void __user *)arg, - sizeof(new_alarm_time))) { - rv = -EFAULT; - goto err1; - } -from_old_alarm_set: - spin_lock_irqsave(&alarm_slock, flags); - pr_alarm(IO, "alarm %d set %ld.%09ld\n", alarm_type, - new_alarm_time.tv_sec, new_alarm_time.tv_nsec); - alarm_enabled |= alarm_type_mask; - devalarm_start(&alarms[alarm_type], - timespec_to_ktime(new_alarm_time)); - spin_unlock_irqrestore(&alarm_slock, flags); - if (ANDROID_ALARM_BASE_CMD(cmd) != ANDROID_ALARM_SET_AND_WAIT(0) - && cmd != ANDROID_ALARM_SET_AND_WAIT_OLD) - break; - /* fall though */ - case ANDROID_ALARM_WAIT: - spin_lock_irqsave(&alarm_slock, flags); - pr_alarm(IO, "alarm wait\n"); - if (!alarm_pending && wait_pending) { - wake_unlock(&alarm_wake_lock); - wait_pending = 0; - } - spin_unlock_irqrestore(&alarm_slock, flags); - rv = wait_event_interruptible(alarm_wait_queue, alarm_pending); - if (rv) - goto err1; - spin_lock_irqsave(&alarm_slock, flags); - rv = alarm_pending; - wait_pending = 1; - alarm_pending = 0; - spin_unlock_irqrestore(&alarm_slock, flags); - break; - case ANDROID_ALARM_SET_RTC: - if (copy_from_user(&new_rtc_time, (void __user *)arg, - sizeof(new_rtc_time))) { - rv = -EFAULT; - goto err1; - } - rtc_time_to_tm(new_rtc_time.tv_sec, &new_rtc_tm); - rtc_dev = alarmtimer_get_rtcdev(); - rv = do_settimeofday(&new_rtc_time); - if (rv < 0) - goto err1; - if (rtc_dev) - rv = rtc_set_time(rtc_dev, &new_rtc_tm); - spin_lock_irqsave(&alarm_slock, flags); - alarm_pending |= ANDROID_ALARM_TIME_CHANGE_MASK; - wake_up(&alarm_wait_queue); - spin_unlock_irqrestore(&alarm_slock, flags); - if (rv < 0) - goto err1; - break; - case ANDROID_ALARM_GET_TIME(0): - switch (alarm_type) { - case ANDROID_ALARM_RTC_WAKEUP: - case ANDROID_ALARM_RTC: - getnstimeofday(&tmp_time); - break; - case ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP: - case ANDROID_ALARM_ELAPSED_REALTIME: - get_monotonic_boottime(&tmp_time); - break; - case ANDROID_ALARM_TYPE_COUNT: - case ANDROID_ALARM_SYSTEMTIME: - ktime_get_ts(&tmp_time); - break; - } - if (copy_to_user((void __user *)arg, &tmp_time, - sizeof(tmp_time))) { - rv = -EFAULT; - goto err1; - } - break; - - default: - rv = -EINVAL; - goto err1; - } -err1: - return rv; -} - -static int alarm_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - return 0; -} - -static int alarm_release(struct inode *inode, struct file *file) -{ - int i; - unsigned long flags; - - spin_lock_irqsave(&alarm_slock, flags); - if (file->private_data != 0) { - for (i = 0; i < ANDROID_ALARM_TYPE_COUNT; i++) { - uint32_t alarm_type_mask = 1U << i; - if (alarm_enabled & alarm_type_mask) { - pr_alarm(INFO, "alarm_release: clear alarm, " - "pending %d\n", - !!(alarm_pending & alarm_type_mask)); - alarm_enabled &= ~alarm_type_mask; - } - spin_unlock_irqrestore(&alarm_slock, flags); - devalarm_cancel(&alarms[i]); - spin_lock_irqsave(&alarm_slock, flags); - } - if (alarm_pending | wait_pending) { - if (alarm_pending) - pr_alarm(INFO, "alarm_release: clear " - "pending alarms %x\n", alarm_pending); - wake_unlock(&alarm_wake_lock); - wait_pending = 0; - alarm_pending = 0; - } - alarm_opened = 0; - } - spin_unlock_irqrestore(&alarm_slock, flags); - return 0; -} - -static void devalarm_triggered(struct devalarm *alarm) -{ - unsigned long flags; - uint32_t alarm_type_mask = 1U << alarm->type; - - pr_alarm(INT, "devalarm_triggered type %d\n", alarm->type); - spin_lock_irqsave(&alarm_slock, flags); - if (alarm_enabled & alarm_type_mask) { - wake_lock_timeout(&alarm_wake_lock, 5 * HZ); - alarm_enabled &= ~alarm_type_mask; - alarm_pending |= alarm_type_mask; - wake_up(&alarm_wait_queue); - } - spin_unlock_irqrestore(&alarm_slock, flags); -} - - -static enum hrtimer_restart devalarm_hrthandler(struct hrtimer *hrt) -{ - struct devalarm *devalrm = container_of(hrt, struct devalarm, u.hrt); - - devalarm_triggered(devalrm); - return HRTIMER_NORESTART; -} - -static enum alarmtimer_restart devalarm_alarmhandler(struct alarm *alrm, - ktime_t now) -{ - struct devalarm *devalrm = container_of(alrm, struct devalarm, u.alrm); - - devalarm_triggered(devalrm); - return ALARMTIMER_NORESTART; -} - - -static const struct file_operations alarm_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = alarm_ioctl, - .open = alarm_open, - .release = alarm_release, -}; - -static struct miscdevice alarm_device = { - .minor = MISC_DYNAMIC_MINOR, - .name = "alarm", - .fops = &alarm_fops, -}; - -static int __init alarm_dev_init(void) -{ - int err; - int i; - - err = misc_register(&alarm_device); - if (err) - return err; - - alarm_init(&alarms[ANDROID_ALARM_RTC_WAKEUP].u.alrm, - ALARM_REALTIME, devalarm_alarmhandler); - hrtimer_init(&alarms[ANDROID_ALARM_RTC].u.hrt, - CLOCK_REALTIME, HRTIMER_MODE_ABS); - alarm_init(&alarms[ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP].u.alrm, - ALARM_BOOTTIME, devalarm_alarmhandler); - hrtimer_init(&alarms[ANDROID_ALARM_ELAPSED_REALTIME].u.hrt, - CLOCK_BOOTTIME, HRTIMER_MODE_ABS); - hrtimer_init(&alarms[ANDROID_ALARM_SYSTEMTIME].u.hrt, - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - - for (i = 0; i < ANDROID_ALARM_TYPE_COUNT; i++) { - alarms[i].type = i; - if (!is_wakeup(i)) - alarms[i].u.hrt.function = devalarm_hrthandler; - } - - wake_lock_init(&alarm_wake_lock, WAKE_LOCK_SUSPEND, "alarm"); - - return 0; -} - -static void __exit alarm_dev_exit(void) -{ - misc_deregister(&alarm_device); - wake_lock_destroy(&alarm_wake_lock); -} - -module_init(alarm_dev_init); -module_exit(alarm_dev_exit); - diff --git a/drivers/staging/android/android_alarm.h b/drivers/staging/android/android_alarm.h deleted file mode 100644 index d0cafd63719..00000000000 --- a/drivers/staging/android/android_alarm.h +++ /dev/null @@ -1,62 +0,0 @@ -/* include/linux/android_alarm.h - * - * Copyright (C) 2006-2007 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef _LINUX_ANDROID_ALARM_H -#define _LINUX_ANDROID_ALARM_H - -#include -#include - -enum android_alarm_type { - /* return code bit numbers or set alarm arg */ - ANDROID_ALARM_RTC_WAKEUP, - ANDROID_ALARM_RTC, - ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP, - ANDROID_ALARM_ELAPSED_REALTIME, - ANDROID_ALARM_SYSTEMTIME, - - ANDROID_ALARM_TYPE_COUNT, - - /* return code bit numbers */ - /* ANDROID_ALARM_TIME_CHANGE = 16 */ -}; - -enum android_alarm_return_flags { - ANDROID_ALARM_RTC_WAKEUP_MASK = 1U << ANDROID_ALARM_RTC_WAKEUP, - ANDROID_ALARM_RTC_MASK = 1U << ANDROID_ALARM_RTC, - ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP_MASK = - 1U << ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP, - ANDROID_ALARM_ELAPSED_REALTIME_MASK = - 1U << ANDROID_ALARM_ELAPSED_REALTIME, - ANDROID_ALARM_SYSTEMTIME_MASK = 1U << ANDROID_ALARM_SYSTEMTIME, - ANDROID_ALARM_TIME_CHANGE_MASK = 1U << 16 -}; - -/* Disable alarm */ -#define ANDROID_ALARM_CLEAR(type) _IO('a', 0 | ((type) << 4)) - -/* Ack last alarm and wait for next */ -#define ANDROID_ALARM_WAIT _IO('a', 1) - -#define ALARM_IOW(c, type, size) _IOW('a', (c) | ((type) << 4), size) -/* Set alarm */ -#define ANDROID_ALARM_SET(type) ALARM_IOW(2, type, struct timespec) -#define ANDROID_ALARM_SET_AND_WAIT(type) ALARM_IOW(3, type, struct timespec) -#define ANDROID_ALARM_GET_TIME(type) ALARM_IOW(4, type, struct timespec) -#define ANDROID_ALARM_SET_RTC _IOW('a', 5, struct timespec) -#define ANDROID_ALARM_BASE_CMD(cmd) (cmd & ~(_IOC(0, 0, 0xf0, 0))) -#define ANDROID_ALARM_IOCTL_TO_TYPE(cmd) (_IOC_NR(cmd) >> 4) - -#endif diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index d206226e220..5826a5a12b6 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -36,8 +37,9 @@ #include #include "binder.h" +#include "binder_trace.h" -static DEFINE_MUTEX(binder_lock); +static DEFINE_MUTEX(binder_main_lock); static DEFINE_MUTEX(binder_deferred_lock); static DEFINE_MUTEX(binder_mmap_lock); @@ -500,6 +502,19 @@ out_unlock: return -EBADF; } +static inline void binder_lock(const char *tag) +{ + trace_binder_lock(tag); + mutex_lock(&binder_main_lock); + trace_binder_locked(tag); +} + +static inline void binder_unlock(const char *tag) +{ + trace_binder_unlock(tag); + mutex_unlock(&binder_main_lock); +} + static void binder_set_nice(long nice) { long min_nice; @@ -626,6 +641,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, if (end <= start) return 0; + trace_binder_update_page_range(proc, allocate, start, end); + if (vma) mm = NULL; else @@ -1575,6 +1592,9 @@ static void binder_transaction(struct binder_proc *proc, t->code = tr->code; t->flags = tr->flags; t->priority = task_nice(current); + + trace_binder_transaction(reply, t, target_node); + t->buffer = binder_alloc_buf(target_proc, tr->data_size, tr->offsets_size, !reply && (t->flags & TF_ONE_WAY)); if (t->buffer == NULL) { @@ -1585,6 +1605,7 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->debug_id = t->debug_id; t->buffer->transaction = t; t->buffer->target_node = target_node; + trace_binder_transaction_alloc_buf(t->buffer); if (target_node) binder_inc_node(target_node, 1, 0, NULL); @@ -1667,6 +1688,7 @@ static void binder_transaction(struct binder_proc *proc, binder_inc_ref(ref, fp->type == BINDER_TYPE_HANDLE, &thread->todo); + trace_binder_transaction_node_to_ref(t, node, ref); binder_debug(BINDER_DEBUG_TRANSACTION, " node %d u%p -> ref %d desc %d\n", node->debug_id, node->ptr, ref->debug_id, @@ -1696,6 +1718,7 @@ static void binder_transaction(struct binder_proc *proc, fp->binder = ref->node->ptr; fp->cookie = ref->node->cookie; binder_inc_node(ref->node, fp->type == BINDER_TYPE_BINDER, 0, NULL); + trace_binder_transaction_ref_to_node(t, ref); binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d -> node %d u%p\n", ref->debug_id, ref->desc, ref->node->debug_id, @@ -1711,6 +1734,8 @@ static void binder_transaction(struct binder_proc *proc, fp->handle = new_ref->desc; fp->cookie = 0; binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL); + trace_binder_transaction_ref_to_ref(t, ref, + new_ref); binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d -> ref %d desc %d (node %d)\n", ref->debug_id, ref->desc, new_ref->debug_id, @@ -1755,6 +1780,7 @@ static void binder_transaction(struct binder_proc *proc, goto err_get_unused_fd_failed; } task_fd_install(target_proc, target_fd, file); + trace_binder_transaction_fd(t, fp->handle, target_fd); binder_debug(BINDER_DEBUG_TRANSACTION, " fd %ld -> %d\n", fp->handle, target_fd); /* TODO: fput? */ @@ -1804,6 +1830,7 @@ err_binder_new_node_failed: err_bad_object_type: err_bad_offset: err_copy_data_failed: + trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, t->buffer, offp); t->buffer->transaction = NULL; binder_free_buf(target_proc, t->buffer); @@ -1849,6 +1876,7 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, if (get_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); + trace_binder_command(cmd); if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.bc)) { binder_stats.bc[_IOC_NR(cmd)]++; proc->stats.bc[_IOC_NR(cmd)]++; @@ -2024,6 +2052,7 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, wake_up_interruptible(&thread->proc->wait); } } + trace_binder_transaction_buffer_release(buffer); binder_transaction_buffer_release(proc, buffer, NULL); binder_free_buf(proc, buffer); break; @@ -2233,6 +2262,7 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, void binder_stat_br(struct binder_proc *proc, struct binder_thread *thread, uint32_t cmd) { + trace_binder_return(cmd); if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.br)) { binder_stats.br[_IOC_NR(cmd)]++; proc->stats.br[_IOC_NR(cmd)]++; @@ -2279,6 +2309,7 @@ retry: if (put_user(thread->return_error2, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); + binder_stat_br(proc, thread, thread->return_error2); if (ptr == end) goto done; thread->return_error2 = BR_OK; @@ -2286,6 +2317,7 @@ retry: if (put_user(thread->return_error, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); + binder_stat_br(proc, thread, thread->return_error); thread->return_error = BR_OK; goto done; } @@ -2294,7 +2326,12 @@ retry: thread->looper |= BINDER_LOOPER_STATE_WAITING; if (wait_for_proc_work) proc->ready_threads++; - mutex_unlock(&binder_lock); + + binder_unlock(__func__); + + trace_binder_wait_for_work(wait_for_proc_work, + !!thread->transaction_stack, + !list_empty(&thread->todo)); if (wait_for_proc_work) { if (!(thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED))) { @@ -2310,15 +2347,17 @@ retry: if (!binder_has_proc_work(proc, thread)) ret = -EAGAIN; } else - ret = wait_event_interruptible_exclusive(proc->wait, binder_has_proc_work(proc, thread)); + ret = wait_event_freezable_exclusive(proc->wait, binder_has_proc_work(proc, thread)); } else { if (non_block) { if (!binder_has_thread_work(thread)) ret = -EAGAIN; } else - ret = wait_event_interruptible(thread->wait, binder_has_thread_work(thread)); + ret = wait_event_freezable(thread->wait, binder_has_thread_work(thread)); } - mutex_lock(&binder_lock); + + binder_lock(__func__); + if (wait_for_proc_work) proc->ready_threads--; thread->looper &= ~BINDER_LOOPER_STATE_WAITING; @@ -2441,6 +2480,7 @@ retry: if (put_user(death->cookie, (void * __user *)ptr)) return -EFAULT; ptr += sizeof(void *); + binder_stat_br(proc, thread, cmd); binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, "binder: %d:%d %s %p\n", proc->pid, thread->pid, @@ -2508,6 +2548,7 @@ retry: return -EFAULT; ptr += sizeof(tr); + trace_binder_transaction_received(t); binder_stat_br(proc, thread, cmd); binder_debug(BINDER_DEBUG_TRANSACTION, "binder: %d:%d %s %d %d:%d, cmd %d" @@ -2548,6 +2589,7 @@ done: proc->pid, thread->pid); if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer)) return -EFAULT; + binder_stat_br(proc, thread, BR_SPAWN_LOOPER); } return 0; } @@ -2684,12 +2726,14 @@ static unsigned int binder_poll(struct file *filp, struct binder_thread *thread = NULL; int wait_for_proc_work; - mutex_lock(&binder_lock); + binder_lock(__func__); + thread = binder_get_thread(proc); wait_for_proc_work = thread->transaction_stack == NULL && list_empty(&thread->todo) && thread->return_error == BR_OK; - mutex_unlock(&binder_lock); + + binder_unlock(__func__); if (wait_for_proc_work) { if (binder_has_proc_work(proc, thread)) @@ -2717,11 +2761,13 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /*printk(KERN_INFO "binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/ + trace_binder_ioctl(cmd, arg); + ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret) - return ret; + goto err_unlocked; - mutex_lock(&binder_lock); + binder_lock(__func__); thread = binder_get_thread(proc); if (thread == NULL) { ret = -ENOMEM; @@ -2746,6 +2792,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (bwr.write_size > 0) { ret = binder_thread_write(proc, thread, (void __user *)bwr.write_buffer, bwr.write_size, &bwr.write_consumed); + trace_binder_write_done(ret); if (ret < 0) { bwr.read_consumed = 0; if (copy_to_user(ubuf, &bwr, sizeof(bwr))) @@ -2755,6 +2802,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } if (bwr.read_size > 0) { ret = binder_thread_read(proc, thread, (void __user *)bwr.read_buffer, bwr.read_size, &bwr.read_consumed, filp->f_flags & O_NONBLOCK); + trace_binder_read_done(ret); if (!list_empty(&proc->todo)) wake_up_interruptible(&proc->wait); if (ret < 0) { @@ -2835,12 +2883,14 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err: if (thread) thread->looper &= ~BINDER_LOOPER_STATE_NEED_RETURN; - mutex_unlock(&binder_lock); + binder_unlock(__func__); wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder: %d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); +err_unlocked: + trace_binder_ioctl_done(ret); return ret; } @@ -3005,13 +3055,16 @@ static int binder_open(struct inode *nodp, struct file *filp) INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); proc->default_priority = task_nice(current); - mutex_lock(&binder_lock); + + binder_lock(__func__); + binder_stats_created(BINDER_STAT_PROC); hlist_add_head(&proc->proc_node, &binder_procs); proc->pid = current->group_leader->pid; INIT_LIST_HEAD(&proc->delivered_death); filp->private_data = proc; - mutex_unlock(&binder_lock); + + binder_unlock(__func__); if (binder_debugfs_dir_entry_proc) { char strbuf[11]; @@ -3194,7 +3247,7 @@ static void binder_deferred_func(struct work_struct *work) int defer; do { - mutex_lock(&binder_lock); + binder_lock(__func__); mutex_lock(&binder_deferred_lock); if (!hlist_empty(&binder_deferred_list)) { proc = hlist_entry(binder_deferred_list.first, @@ -3221,7 +3274,7 @@ static void binder_deferred_func(struct work_struct *work) if (defer & BINDER_DEFERRED_RELEASE) binder_deferred_release(proc); /* frees proc */ - mutex_unlock(&binder_lock); + binder_unlock(__func__); if (files) put_files_struct(files); } while (proc); @@ -3562,7 +3615,7 @@ static int binder_state_show(struct seq_file *m, void *unused) int do_lock = !binder_debug_no_lock; if (do_lock) - mutex_lock(&binder_lock); + binder_lock(__func__); seq_puts(m, "binder state:\n"); @@ -3574,7 +3627,7 @@ static int binder_state_show(struct seq_file *m, void *unused) hlist_for_each_entry(proc, pos, &binder_procs, proc_node) print_binder_proc(m, proc, 1); if (do_lock) - mutex_unlock(&binder_lock); + binder_unlock(__func__); return 0; } @@ -3585,7 +3638,7 @@ static int binder_stats_show(struct seq_file *m, void *unused) int do_lock = !binder_debug_no_lock; if (do_lock) - mutex_lock(&binder_lock); + binder_lock(__func__); seq_puts(m, "binder stats:\n"); @@ -3594,7 +3647,7 @@ static int binder_stats_show(struct seq_file *m, void *unused) hlist_for_each_entry(proc, pos, &binder_procs, proc_node) print_binder_proc_stats(m, proc); if (do_lock) - mutex_unlock(&binder_lock); + binder_unlock(__func__); return 0; } @@ -3605,13 +3658,13 @@ static int binder_transactions_show(struct seq_file *m, void *unused) int do_lock = !binder_debug_no_lock; if (do_lock) - mutex_lock(&binder_lock); + binder_lock(__func__); seq_puts(m, "binder transactions:\n"); hlist_for_each_entry(proc, pos, &binder_procs, proc_node) print_binder_proc(m, proc, 0); if (do_lock) - mutex_unlock(&binder_lock); + binder_unlock(__func__); return 0; } @@ -3624,7 +3677,7 @@ static int binder_proc_show(struct seq_file *m, void *unused) bool valid_proc = false; if (do_lock) - mutex_lock(&binder_lock); + binder_lock(__func__); hlist_for_each_entry(itr, pos, &binder_procs, proc_node) { if (itr == proc) { @@ -3637,7 +3690,7 @@ static int binder_proc_show(struct seq_file *m, void *unused) print_binder_proc(m, proc, 1); } if (do_lock) - mutex_unlock(&binder_lock); + binder_unlock(__func__); return 0; } @@ -3732,4 +3785,7 @@ static int __init binder_init(void) device_initcall(binder_init); +#define CREATE_TRACE_POINTS +#include "binder_trace.h" + MODULE_LICENSE("GPL v2"); diff --git a/drivers/staging/android/binder_trace.h b/drivers/staging/android/binder_trace.h new file mode 100644 index 00000000000..82a567c2af6 --- /dev/null +++ b/drivers/staging/android/binder_trace.h @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2012 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM binder + +#if !defined(_BINDER_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _BINDER_TRACE_H + +#include + +struct binder_buffer; +struct binder_node; +struct binder_proc; +struct binder_ref; +struct binder_thread; +struct binder_transaction; + +TRACE_EVENT(binder_ioctl, + TP_PROTO(unsigned int cmd, unsigned long arg), + TP_ARGS(cmd, arg), + + TP_STRUCT__entry( + __field(unsigned int, cmd) + __field(unsigned long, arg) + ), + TP_fast_assign( + __entry->cmd = cmd; + __entry->arg = arg; + ), + TP_printk("cmd=0x%x arg=0x%lx", __entry->cmd, __entry->arg) +); + +DECLARE_EVENT_CLASS(binder_lock_class, + TP_PROTO(const char *tag), + TP_ARGS(tag), + TP_STRUCT__entry( + __field(const char *, tag) + ), + TP_fast_assign( + __entry->tag = tag; + ), + TP_printk("tag=%s", __entry->tag) +); + +#define DEFINE_BINDER_LOCK_EVENT(name) \ +DEFINE_EVENT(binder_lock_class, name, \ + TP_PROTO(const char *func), \ + TP_ARGS(func)) + +DEFINE_BINDER_LOCK_EVENT(binder_lock); +DEFINE_BINDER_LOCK_EVENT(binder_locked); +DEFINE_BINDER_LOCK_EVENT(binder_unlock); + +DECLARE_EVENT_CLASS(binder_function_return_class, + TP_PROTO(int ret), + TP_ARGS(ret), + TP_STRUCT__entry( + __field(int, ret) + ), + TP_fast_assign( + __entry->ret = ret; + ), + TP_printk("ret=%d", __entry->ret) +); + +#define DEFINE_BINDER_FUNCTION_RETURN_EVENT(name) \ +DEFINE_EVENT(binder_function_return_class, name, \ + TP_PROTO(int ret), \ + TP_ARGS(ret)) + +DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_ioctl_done); +DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_write_done); +DEFINE_BINDER_FUNCTION_RETURN_EVENT(binder_read_done); + +TRACE_EVENT(binder_wait_for_work, + TP_PROTO(bool proc_work, bool transaction_stack, bool thread_todo), + TP_ARGS(proc_work, transaction_stack, thread_todo), + + TP_STRUCT__entry( + __field(bool, proc_work) + __field(bool, transaction_stack) + __field(bool, thread_todo) + ), + TP_fast_assign( + __entry->proc_work = proc_work; + __entry->transaction_stack = transaction_stack; + __entry->thread_todo = thread_todo; + ), + TP_printk("proc_work=%d transaction_stack=%d thread_todo=%d", + __entry->proc_work, __entry->transaction_stack, + __entry->thread_todo) +); + +TRACE_EVENT(binder_transaction, + TP_PROTO(bool reply, struct binder_transaction *t, + struct binder_node *target_node), + TP_ARGS(reply, t, target_node), + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, target_node) + __field(int, to_proc) + __field(int, to_thread) + __field(int, reply) + __field(unsigned int, code) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->target_node = target_node ? target_node->debug_id : 0; + __entry->to_proc = t->to_proc->pid; + __entry->to_thread = t->to_thread ? t->to_thread->pid : 0; + __entry->reply = reply; + __entry->code = t->code; + __entry->flags = t->flags; + ), + TP_printk("transaction=%d dest_node=%d dest_proc=%d dest_thread=%d reply=%d flags=0x%x code=0x%x", + __entry->debug_id, __entry->target_node, + __entry->to_proc, __entry->to_thread, + __entry->reply, __entry->flags, __entry->code) +); + +TRACE_EVENT(binder_transaction_received, + TP_PROTO(struct binder_transaction *t), + TP_ARGS(t), + + TP_STRUCT__entry( + __field(int, debug_id) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + ), + TP_printk("transaction=%d", __entry->debug_id) +); + +TRACE_EVENT(binder_transaction_node_to_ref, + TP_PROTO(struct binder_transaction *t, struct binder_node *node, + struct binder_ref *ref), + TP_ARGS(t, node, ref), + + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, node_debug_id) + __field(void __user *, node_ptr) + __field(int, ref_debug_id) + __field(uint32_t, ref_desc) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->node_debug_id = node->debug_id; + __entry->node_ptr = node->ptr; + __entry->ref_debug_id = ref->debug_id; + __entry->ref_desc = ref->desc; + ), + TP_printk("transaction=%d node=%d src_ptr=0x%p ==> dest_ref=%d dest_desc=%d", + __entry->debug_id, __entry->node_debug_id, __entry->node_ptr, + __entry->ref_debug_id, __entry->ref_desc) +); + +TRACE_EVENT(binder_transaction_ref_to_node, + TP_PROTO(struct binder_transaction *t, struct binder_ref *ref), + TP_ARGS(t, ref), + + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, ref_debug_id) + __field(uint32_t, ref_desc) + __field(int, node_debug_id) + __field(void __user *, node_ptr) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->ref_debug_id = ref->debug_id; + __entry->ref_desc = ref->desc; + __entry->node_debug_id = ref->node->debug_id; + __entry->node_ptr = ref->node->ptr; + ), + TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ptr=0x%p", + __entry->debug_id, __entry->node_debug_id, + __entry->ref_debug_id, __entry->ref_desc, __entry->node_ptr) +); + +TRACE_EVENT(binder_transaction_ref_to_ref, + TP_PROTO(struct binder_transaction *t, struct binder_ref *src_ref, + struct binder_ref *dest_ref), + TP_ARGS(t, src_ref, dest_ref), + + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, node_debug_id) + __field(int, src_ref_debug_id) + __field(uint32_t, src_ref_desc) + __field(int, dest_ref_debug_id) + __field(uint32_t, dest_ref_desc) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->node_debug_id = src_ref->node->debug_id; + __entry->src_ref_debug_id = src_ref->debug_id; + __entry->src_ref_desc = src_ref->desc; + __entry->dest_ref_debug_id = dest_ref->debug_id; + __entry->dest_ref_desc = dest_ref->desc; + ), + TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ref=%d dest_desc=%d", + __entry->debug_id, __entry->node_debug_id, + __entry->src_ref_debug_id, __entry->src_ref_desc, + __entry->dest_ref_debug_id, __entry->dest_ref_desc) +); + +TRACE_EVENT(binder_transaction_fd, + TP_PROTO(struct binder_transaction *t, int src_fd, int dest_fd), + TP_ARGS(t, src_fd, dest_fd), + + TP_STRUCT__entry( + __field(int, debug_id) + __field(int, src_fd) + __field(int, dest_fd) + ), + TP_fast_assign( + __entry->debug_id = t->debug_id; + __entry->src_fd = src_fd; + __entry->dest_fd = dest_fd; + ), + TP_printk("transaction=%d src_fd=%d ==> dest_fd=%d", + __entry->debug_id, __entry->src_fd, __entry->dest_fd) +); + +DECLARE_EVENT_CLASS(binder_buffer_class, + TP_PROTO(struct binder_buffer *buf), + TP_ARGS(buf), + TP_STRUCT__entry( + __field(int, debug_id) + __field(size_t, data_size) + __field(size_t, offsets_size) + ), + TP_fast_assign( + __entry->debug_id = buf->debug_id; + __entry->data_size = buf->data_size; + __entry->offsets_size = buf->offsets_size; + ), + TP_printk("transaction=%d data_size=%zd offsets_size=%zd", + __entry->debug_id, __entry->data_size, __entry->offsets_size) +); + +DEFINE_EVENT(binder_buffer_class, binder_transaction_alloc_buf, + TP_PROTO(struct binder_buffer *buffer), + TP_ARGS(buffer)); + +DEFINE_EVENT(binder_buffer_class, binder_transaction_buffer_release, + TP_PROTO(struct binder_buffer *buffer), + TP_ARGS(buffer)); + +DEFINE_EVENT(binder_buffer_class, binder_transaction_failed_buffer_release, + TP_PROTO(struct binder_buffer *buffer), + TP_ARGS(buffer)); + +TRACE_EVENT(binder_update_page_range, + TP_PROTO(struct binder_proc *proc, bool allocate, + void *start, void *end), + TP_ARGS(proc, allocate, start, end), + TP_STRUCT__entry( + __field(int, proc) + __field(bool, allocate) + __field(size_t, offset) + __field(size_t, size) + ), + TP_fast_assign( + __entry->proc = proc->pid; + __entry->allocate = allocate; + __entry->offset = start - proc->buffer; + __entry->size = end - start; + ), + TP_printk("proc=%d allocate=%d offset=%zu size=%zu", + __entry->proc, __entry->allocate, + __entry->offset, __entry->size) +); + +TRACE_EVENT(binder_command, + TP_PROTO(uint32_t cmd), + TP_ARGS(cmd), + TP_STRUCT__entry( + __field(uint32_t, cmd) + ), + TP_fast_assign( + __entry->cmd = cmd; + ), + TP_printk("cmd=0x%x %s", + __entry->cmd, + _IOC_NR(__entry->cmd) < ARRAY_SIZE(binder_command_strings) ? + binder_command_strings[_IOC_NR(__entry->cmd)] : + "unknown") +); + +TRACE_EVENT(binder_return, + TP_PROTO(uint32_t cmd), + TP_ARGS(cmd), + TP_STRUCT__entry( + __field(uint32_t, cmd) + ), + TP_fast_assign( + __entry->cmd = cmd; + ), + TP_printk("cmd=0x%x %s", + __entry->cmd, + _IOC_NR(__entry->cmd) < ARRAY_SIZE(binder_return_strings) ? + binder_return_strings[_IOC_NR(__entry->cmd)] : + "unknown") +); + +#endif /* _BINDER_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE binder_trace +#include diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 8d8cef068fc..2e27715f91b 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -30,11 +30,14 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include #include #include +#include #include #include #include @@ -85,7 +88,7 @@ static unsigned long lowmem_deathpending_timeout; #define lowmem_print(level, x...) \ do { \ if (lowmem_debug_level >= (level)) \ - printk(x); \ + pr_info(x); \ } while (0) #if defined(CONFIG_SEC_DEBUG_LMK_MEMINFO) static void dump_tasks_info(void) @@ -157,6 +160,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; + int minfree = 0; int selected_tasksize = 0; int selected_oom_score_adj; #ifdef CONFIG_SAMP_HOTNESS @@ -184,7 +188,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) return 0; } - other_free = global_page_state(NR_FREE_PAGES); + other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; nr_cma_free = global_page_state(NR_FREE_CMA_PAGES); #ifdef CONFIG_ZSWAP @@ -200,7 +204,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) is_active_high = (global_page_state(NR_ACTIVE_FILE) > global_page_state(NR_INACTIVE_FILE)) ? 1 : 0; #endif - other_file = global_page_state(NR_FILE_PAGES) + zcache_pages(); + other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM) + zcache_pages(); #if defined(CONFIG_CMA_PAGE_COUNTING) && defined(CONFIG_EXCLUDE_LRU_LIVING_IN_CMA) if (get_nr_swap_pages() < SSWAP_LMK_THRESHOLD && cma_page_ratio >= CMA_PAGE_RATIO @@ -219,8 +223,8 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { - if (other_free < lowmem_minfree[i] && - other_file < lowmem_minfree[i]) { + minfree = lowmem_minfree[i]; + if (other_free < minfree && other_file < minfree) { min_score_adj = lowmem_adj[i]; break; } @@ -584,9 +588,18 @@ static int android_oom_handler(struct notifier_block *nb, } #else if (selected) { - lowmem_print(1, "oom: send sigkill to %d (%s), adj %d, size %d\n", - selected->pid, selected->comm, - selected_oom_score_adj, selected_tasksize); + lowmem_print(1, "Killing '%s' (%d), adj %d,\n" \ + " to free %ldkB on behalf of '%s' (%d) because\n" \ + " cache %ldkB is below limit %ldkB for oom_score_adj %d\n" \ + " Free memory is %ldkB above reserved\n", + selected->comm, selected->pid, + selected_oom_score_adj, + selected_tasksize * (long)(PAGE_SIZE / 1024), + current->comm, current->pid, + other_file * (long)(PAGE_SIZE / 1024), + minfree * (long)(PAGE_SIZE / 1024), + min_score_adj, + other_free * (long)(PAGE_SIZE / 1024)); send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; diff --git a/drivers/staging/android/ram_console.c b/drivers/staging/android/ram_console.c index c778180ff6c..364519269f5 100644 --- a/drivers/staging/android/ram_console.c +++ b/drivers/staging/android/ram_console.c @@ -108,9 +108,6 @@ static ssize_t ram_console_read_old(struct file *file, char __user *buf, char *str; int ret; - if (dmesg_restrict && !capable(CAP_SYSLOG)) - return -EPERM; - /* Main last_kmsg log */ if (pos < old_log_size) { count = min(len, (size_t)(old_log_size - pos)); diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index 1b7d2c0abd7..8b409d402a3 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -427,6 +427,8 @@ static void s3c24xx_serial_shutdown(struct uart_port *port) if (ourport->tx_claimed) { if (!s3c24xx_serial_has_interrupt_mask(port)) free_irq(ourport->tx_irq, ourport); + else + free_irq(port->irq, ourport); tx_enabled(port) = 0; ourport->tx_claimed = 0; } @@ -434,6 +436,8 @@ static void s3c24xx_serial_shutdown(struct uart_port *port) if (ourport->rx_claimed) { if (!s3c24xx_serial_has_interrupt_mask(port)) free_irq(ourport->rx_irq, ourport); + /* else already freed above as the s3c64xx_serial_startup() + * will have set both tx_claimed and rx_claimed */ ourport->rx_claimed = 0; rx_enabled(port) = 0; } diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 8e8b934e073..7c6d7ed33ac 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -305,8 +305,10 @@ static void acc_complete_in(struct usb_ep *ep, struct usb_request *req) { struct acc_dev *dev = _acc_dev; - if (req->status != 0) + if (req->status == -ESHUTDOWN) { + pr_debug("acc_complete_in set disconnected"); acc_set_disconnected(dev); + } req_put(dev, &dev->tx_idle, req); @@ -318,8 +320,10 @@ static void acc_complete_out(struct usb_ep *ep, struct usb_request *req) struct acc_dev *dev = _acc_dev; dev->rx_done = 1; - if (req->status != 0) + if (req->status == -ESHUTDOWN) { + pr_debug("acc_complete_out set disconnected"); acc_set_disconnected(dev); + } wake_up(&dev->read_wq); } @@ -601,8 +605,10 @@ static ssize_t acc_read(struct file *fp, char __user *buf, pr_debug("acc_read(%d)\n", count); - if (dev->disconnected) + if (dev->disconnected) { + pr_debug("acc_read disconnected"); return -ENODEV; + } if (count > BULK_BUFFER_SIZE) count = BULK_BUFFER_SIZE; @@ -615,6 +621,12 @@ static ssize_t acc_read(struct file *fp, char __user *buf, goto done; } + if (dev->rx_done) { + // last req cancelled. try to get it. + req = dev->rx_req[0]; + goto copy_data; + } + requeue_req: /* queue a request */ req = dev->rx_req[0]; @@ -632,9 +644,17 @@ requeue_req: ret = wait_event_interruptible(dev->read_wq, dev->rx_done); if (ret < 0) { r = ret; - usb_ep_dequeue(dev->ep_out, req); + ret = usb_ep_dequeue(dev->ep_out, req); + if (ret != 0) { + // cancel failed. There can be a data already received. + // it will be retrieved in the next read. + pr_debug("acc_read: cancelling failed %d", ret); + } goto done; } + +copy_data: + dev->rx_done = 0; if (dev->online) { /* If we got a 0-len packet, throw it back and try again. */ if (req->actual == 0) @@ -663,8 +683,10 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, pr_debug("acc_write(%d)\n", count); - if (!dev->online || dev->disconnected) + if (!dev->online || dev->disconnected) { + pr_debug("acc_write disconnected or not online"); return -ENODEV; + } while (count > 0) { if (!dev->online) { @@ -682,10 +704,17 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, break; } - if (count > BULK_BUFFER_SIZE) + if (count > BULK_BUFFER_SIZE) { xfer = BULK_BUFFER_SIZE; - else + /* ZLP, They will be more TX requests so not yet. */ + req->zero = 0; + } else { xfer = count; + /* If the data length is a multple of the + * maxpacket size then send a zero length packet(ZLP). + */ + req->zero = ((xfer % dev->ep_in->maxpacket) == 0); + } if (copy_from_user(req->buf, buf, xfer)) { r = -EFAULT; break; diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index 639e7cad740..11d9420ffaa 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -1476,8 +1476,22 @@ static int functionfs_bind_config(struct usb_composite_dev *cdev, static void ffs_func_free(struct ffs_function *func) { + struct ffs_ep *ep = func->eps; + unsigned count = func->ffs->eps_count; + unsigned long flags; + ENTER(); + /* cleanup after autoconfig */ + spin_lock_irqsave(&func->ffs->eps_lock, flags); + do { + if (ep->ep && ep->req) + usb_ep_free_request(ep->ep, ep->req); + ep->req = NULL; + ++ep; + } while (--count); + spin_unlock_irqrestore(&func->ffs->eps_lock, flags); + ffs_data_put(func->ffs); kfree(func->eps); @@ -1522,7 +1536,12 @@ static int ffs_func_eps_enable(struct ffs_function *func) spin_lock_irqsave(&func->ffs->eps_lock, flags); do { struct usb_endpoint_descriptor *ds; - ds = ep->descs[ep->descs[1] ? 1 : 0]; + int desc_idx = ffs->gadget->speed == USB_SPEED_HIGH ? 1 : 0; + ds = ep->descs[desc_idx]; + if (!ds) { + ret = -EINVAL; + break; + } ep->ep->driver_data = ep; ep->ep->desc = ds; diff --git a/drivers/w1/masters/ds2482.c b/drivers/w1/masters/ds2482.c index e5f74416d4b..d409352fe51 100644 --- a/drivers/w1/masters/ds2482.c +++ b/drivers/w1/masters/ds2482.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include "../w1.h" @@ -84,7 +86,8 @@ static const u8 ds2482_chan_rd[8] = static int ds2482_probe(struct i2c_client *client, const struct i2c_device_id *id); static int ds2482_remove(struct i2c_client *client); - +static int ds2482_suspend(struct device *dev); +static int ds2482_resume(struct device *dev); /** * Driver data (common to all clients) @@ -94,10 +97,16 @@ static const struct i2c_device_id ds2482_id[] = { { } }; +static const struct dev_pm_ops ds2482_pm_ops = { + .suspend = ds2482_suspend, + .resume = ds2482_resume, +}; + static struct i2c_driver ds2482_driver = { .driver = { .owner = THIS_MODULE, .name = "ds2482", + .pm = &ds2482_pm_ops, }, .probe = ds2482_probe, .remove = ds2482_remove, @@ -119,6 +128,7 @@ struct ds2482_w1_chan { struct ds2482_data { struct i2c_client *client; struct mutex access_lock; + int slpz_gpio; /* 1-wire interface(s) */ int w1_count; /* 1 or 8 */ @@ -407,11 +417,31 @@ static u8 ds2482_w1_reset_bus(void *data) return retval; } +static int ds2482_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ds2482_data *data = i2c_get_clientdata(client); + + if (data->slpz_gpio >= 0) + gpio_set_value(data->slpz_gpio, 0); + return 0; +} + +static int ds2482_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ds2482_data *data = i2c_get_clientdata(client); + + if (data->slpz_gpio >= 0) + gpio_set_value(data->slpz_gpio, 1); + return 0; +} static int ds2482_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct ds2482_data *data; + struct ds2482_platform_data *pdata; int err = -ENODEV; int temp1; int idx; @@ -476,6 +506,16 @@ static int ds2482_probe(struct i2c_client *client, } } + pdata = client->dev.platform_data; + data->slpz_gpio = pdata ? pdata->slpz_gpio : -1; + + if (data->slpz_gpio >= 0) { + err = gpio_request_one(data->slpz_gpio, GPIOF_OUT_INIT_HIGH, + "ds2482.slpz"); + if (err < 0) + goto exit_w1_remove; + } + return 0; exit_w1_remove: @@ -500,6 +540,11 @@ static int ds2482_remove(struct i2c_client *client) w1_remove_master_device(&data->w1_ch[idx].w1_bm); } + if (data->slpz_gpio >= 0) { + gpio_set_value(data->slpz_gpio, 0); + gpio_free(data->slpz_gpio); + } + /* Free the memory */ kfree(data); return 0; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 103e56ceb38..955b9c2a93a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -340,7 +340,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) { int error; - error = wait_event_freezekillable(server->response_q, + error = wait_event_freezekillable_unsafe(server->response_q, midQ->mid_state != MID_REQUEST_SUBMITTED); if (error < 0) return -ERESTARTSYS; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 33c9599c14e..e5b68f2e0ac 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include #include @@ -87,7 +89,7 @@ */ /* Epoll private bits inside the event mask */ -#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) +#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET) /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 @@ -154,6 +156,9 @@ struct epitem { /* List header used to link this item to the "struct file" items list */ struct list_head fllink; + /* wakeup_source used when EPOLLWAKEUP is set */ + struct wakeup_source *ws; + /* The structure that describe the interested events and the source fd */ struct epoll_event event; }; @@ -194,6 +199,9 @@ struct eventpoll { */ struct epitem *ovflist; + /* wakeup_source used when ep_scan_ready_list is running */ + struct wakeup_source *ws; + /* The user that created the eventpoll descriptor */ struct user_struct *user; @@ -588,8 +596,10 @@ static int ep_scan_ready_list(struct eventpoll *ep, * queued into ->ovflist but the "txlist" might already * contain them, and the list_splice() below takes care of them. */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } } /* * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after @@ -602,6 +612,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, * Quickly re-inject items left on "txlist". */ list_splice(&txlist, &ep->rdllist); + __pm_relax(ep->ws); if (!list_empty(&ep->rdllist)) { /* @@ -656,6 +667,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); @@ -706,6 +719,7 @@ static void ep_free(struct eventpoll *ep) mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); free_uid(ep->user); + wakeup_source_unregister(ep->ws); kfree(ep); } @@ -737,6 +751,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, * callback, but it's not actually ready, as far as * caller requested events goes. We can remove it here. */ + __pm_relax(epi->ws); list_del_init(&epi->rdllink); } } @@ -927,13 +942,23 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k if (epi->next == EP_UNACTIVE_PTR) { epi->next = ep->ovflist; ep->ovflist = epi; + if (epi->ws) { + /* + * Activate ep->ws since epi->ws may get + * deactivated at any time. + */ + __pm_stay_awake(ep->ws); + } + } goto out_unlock; } /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() @@ -1091,6 +1116,30 @@ static int reverse_path_check(void) return error; } +static int ep_create_wakeup_source(struct epitem *epi) +{ + const char *name; + + if (!epi->ep->ws) { + epi->ep->ws = wakeup_source_register("eventpoll"); + if (!epi->ep->ws) + return -ENOMEM; + } + + name = epi->ffd.file->f_path.dentry->d_name.name; + epi->ws = wakeup_source_register(name); + if (!epi->ws) + return -ENOMEM; + + return 0; +} + +static void ep_destroy_wakeup_source(struct epitem *epi) +{ + wakeup_source_unregister(epi->ws); + epi->ws = NULL; +} + /* * Must be called with "mtx" held. */ @@ -1118,6 +1167,13 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, epi->event = *event; epi->nwait = 0; epi->next = EP_UNACTIVE_PTR; + if (epi->event.events & EPOLLWAKEUP) { + error = ep_create_wakeup_source(epi); + if (error) + goto error_create_wakeup_source; + } else { + epi->ws = NULL; + } /* Initialize the poll table using the queue callback */ epq.epi = epi; @@ -1164,6 +1220,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1204,6 +1261,9 @@ error_unregister: list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + +error_create_wakeup_source: kmem_cache_free(epi_cache, epi); return error; @@ -1229,6 +1289,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even epi->event.events = event->events; /* need barrier below */ pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ + if (epi->event.events & EPOLLWAKEUP) { + if (!epi->ws) + ep_create_wakeup_source(epi); + } else if (epi->ws) { + ep_destroy_wakeup_source(epi); + } /* * The following barrier has two effects: @@ -1264,6 +1330,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even spin_lock_irq(&ep->lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1302,6 +1369,18 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, !list_empty(head) && eventcnt < esed->maxevents;) { epi = list_first_entry(head, struct epitem, rdllink); + /* + * Activate ep->ws before deactivating epi->ws to prevent + * triggering auto-suspend here (in case we reactive epi->ws + * below). + * + * This could be rearranged to delay the deactivation of epi->ws + * instead, but then epi->ws would temporarily be out of sync + * with ep_is_linked(). + */ + if (epi->ws && epi->ws->active) + __pm_stay_awake(ep->ws); + __pm_relax(epi->ws); list_del_init(&epi->rdllink); pt._key = epi->event.events; @@ -1318,6 +1397,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, if (__put_user(revents, &uevent->events) || __put_user(epi->event.data, &uevent->data)) { list_add(&epi->rdllink, head); + __pm_stay_awake(epi->ws); return eventcnt ? eventcnt : -EFAULT; } eventcnt++; @@ -1337,6 +1417,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); } } } @@ -1435,7 +1516,8 @@ fetch_events: } spin_unlock_irqrestore(&ep->lock, flags); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!freezable_schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags); @@ -1649,6 +1731,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, if (!tfile->f_op || !tfile->f_op->poll) goto error_tgt_fput; + /* Check if EPOLLWAKEUP is allowed */ + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) + epds.events &= ~EPOLLWAKEUP; + /* * We have to check that the file structure underneath the file descriptor * the user passed to us _is_ an eventpoll file. And also we do not permit diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1987fac9c8f..b5a94beaf7e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1936,7 +1936,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); -extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); +extern int ext4_trim_fs(struct super_block *, struct fstrim_range *, + unsigned long blkdev_flags); /* inode.c */ struct buffer_head *ext4_getblk(handle_t *, struct inode *, diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 2e09d35b06e..48f34d08f03 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -828,6 +828,7 @@ got: } else { ext4_lock_group(sb, group); } + ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); if (S_ISDIR(mode)) { ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index efaf7c8a904..7ddc2033f08 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -400,11 +400,13 @@ resizefs_out: return err; } + case FIDTRIM: case FITRIM: { struct request_queue *q = bdev_get_queue(sb->s_bdev); struct fstrim_range range; int ret = 0; + int flags = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -419,13 +421,15 @@ resizefs_out: return -EOPNOTSUPP; } + if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q)) + return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; range.minlen = max((unsigned int)range.minlen, q->limits.discard_granularity); - ret = ext4_trim_fs(sb, &range); + ret = ext4_trim_fs(sb, &range, flags); if (ret < 0) return ret; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7363f9ce1ce..aad14a24d75 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2596,7 +2596,8 @@ int ext4_mb_release(struct super_block *sb) } static inline int ext4_issue_discard(struct super_block *sb, - ext4_group_t block_group, ext4_grpblk_t cluster, int count) + ext4_group_t block_group, ext4_grpblk_t cluster, int count, + unsigned long flags) { ext4_fsblk_t discard_block; @@ -2605,7 +2606,7 @@ static inline int ext4_issue_discard(struct super_block *sb, count = EXT4_C2B(EXT4_SB(sb), count); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); + return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags); } /* @@ -2626,7 +2627,7 @@ static void ext4_free_data_callback(struct super_block *sb, if (test_opt(sb, DISCARD)) ext4_issue_discard(sb, entry->efd_group, - entry->efd_start_cluster, entry->efd_count); + entry->efd_start_cluster, entry->efd_count, 0); err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); /* we expect to find existing buddy because it's pinned */ @@ -4891,13 +4892,15 @@ error_return: * @count: number of blocks to TRIM * @group: alloc. group we are working with * @e4b: ext4 buddy for the group + * @blkdev_flags: flags for the block device * * Trim "count" blocks starting at "start" in the "group". To assure that no * one will allocate those blocks, mark it as used in buddy bitmap. This must * be called with under the group lock. */ static void ext4_trim_extent(struct super_block *sb, int start, int count, - ext4_group_t group, struct ext4_buddy *e4b) + ext4_group_t group, struct ext4_buddy *e4b, + unsigned long blkdev_flags) { struct ext4_free_extent ex; @@ -4915,7 +4918,7 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, */ mb_mark_used(e4b, &ex); ext4_unlock_group(sb, group); - ext4_issue_discard(sb, group, start, count); + ext4_issue_discard(sb, group, start, count, blkdev_flags); ext4_lock_group(sb, group); mb_free_blocks(NULL, e4b, start, ex.fe_len); } @@ -4927,6 +4930,7 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count + * @blkdev_flags: flags for the block device * * ext4_trim_all_free walks through group's buddy bitmap searching for free * extents. When the free block is found, ext4_trim_extent is called to TRIM @@ -4941,7 +4945,7 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, static ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, - ext4_grpblk_t minblocks) + ext4_grpblk_t minblocks, unsigned long blkdev_flags) { void *bitmap; ext4_grpblk_t next, count = 0, free_count = 0; @@ -4974,7 +4978,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, if ((next - start) >= minblocks) { ext4_trim_extent(sb, start, - next - start, group, &e4b); + next - start, group, &e4b, blkdev_flags); count += next - start; } free_count += next - start; @@ -5011,6 +5015,7 @@ out: * ext4_trim_fs() -- trim ioctl handle function * @sb: superblock for filesystem * @range: fstrim_range structure + * @blkdev_flags: flags for the block device * * start: First Byte to trim * len: number of Bytes to trim from start @@ -5019,7 +5024,8 @@ out: * start to start+len. For each such a group ext4_trim_all_free function * is invoked to trim all free space. */ -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range, + unsigned long blkdev_flags) { struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; @@ -5074,7 +5080,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - end, minlen); + end, minlen, blkdev_flags); if (cnt < 0) { ret = cnt; break; diff --git a/fs/mount.h b/fs/mount.h index 8c02f00bebb..cd500798040 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -4,10 +4,10 @@ struct mnt_namespace { atomic_t count; - unsigned int proc_inum; + unsigned int proc_inum; struct mount * root; struct list_head list; - struct user_namespace *user_ns; + struct user_namespace *user_ns; u64 seq; /* Sequence number to prevent loops */ wait_queue_head_t poll; int event; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bf980b5821c..5c6e0b5d493 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -79,7 +79,7 @@ int nfs_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - freezable_schedule(); + freezable_schedule_unsafe(); return 0; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 51017baa67a..5e652f4143e 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -33,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) res = rpc_call_sync(clnt, msg, flags); if (res != -EJUKEBOX) break; - freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; } while (!fatal_signal_pending(current)); return res; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 92eff4da006..119839f09df 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -250,7 +250,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) *timeout = NFS4_POLL_RETRY_MIN; if (*timeout > NFS4_POLL_RETRY_MAX) *timeout = NFS4_POLL_RETRY_MAX; - freezable_schedule_timeout_killable(*timeout); + freezable_schedule_timeout_killable_unsafe(*timeout); if (fatal_signal_pending(current)) res = -ERESTARTSYS; *timeout <<= 1; @@ -4198,7 +4198,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 static unsigned long nfs4_set_lock_task_retry(unsigned long timeout) { - freezable_schedule_timeout_killable(timeout); + freezable_schedule_timeout_killable_unsafe(timeout); timeout <<= 1; if (timeout > NFS4_LOCK_MAXTIMEOUT) return NFS4_LOCK_MAXTIMEOUT; diff --git a/fs/pnode.h b/fs/pnode.h index 7326d98340f..ff20a615ab7 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -25,7 +25,7 @@ #define CL_COPY_ALL 0x04 #define CL_MAKE_SHARED 0x08 #define CL_PRIVATE 0x10 -#define CL_SHARED_TO_SLAVE 0x20 +#define CL_SHARED_TO_SLAVE 0x20 static inline void set_mnt_shared(struct mount *mnt) { diff --git a/fs/proc/base.c b/fs/proc/base.c index 908b802ce25..cbf74f3d9f1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -137,12 +137,6 @@ struct pid_entry { static int proc_fd_permission(struct inode *inode, int mask); -/* ANDROID is for special files in /proc. */ -#define ANDROID(NAME, MODE, OTYPE) \ - NOD(NAME, (S_IFREG|(MODE)), \ - &proc_##OTYPE##_inode_operations, \ - &proc_##OTYPE##_operations, {}) - /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. @@ -2410,11 +2404,19 @@ static const struct file_operations proc_map_files_operations = { */ static int proc_fd_permission(struct inode *inode, int mask) { - int rv = generic_permission(inode, mask); + struct task_struct *p; + int rv; + + rv = generic_permission(inode, mask); if (rv == 0) - return 0; - if (task_pid(current) == proc_pid(inode)) + return rv; + + rcu_read_lock(); + p = pid_task(proc_pid(inode), PIDTYPE_PID); + if (p && same_thread_group(p, current)) rv = 0; + rcu_read_unlock(); + return rv; } @@ -3084,8 +3086,8 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - ANDROID("oom_adj",S_IRUGO|S_IWUSR, oom_adjust), - REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUSR, proc_oom_adjust_operations), + REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), #ifdef CONFIG_SAMP_HOTNESS REG("hotness_adj", S_IWUGO, proc_hotness_adjust_operations), #endif @@ -3445,8 +3447,8 @@ static const struct pid_entry tid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), - REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUSR, proc_oom_adjust_operations), + REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 5a7318873b4..6936179207e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -749,6 +749,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, #define CLEAR_REFS_ALL 1 #define CLEAR_REFS_ANON 2 #define CLEAR_REFS_MAPPED 3 +#define CLEAR_REFS_MM_HIWATER_RSS 5 static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) @@ -768,7 +769,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, rv = kstrtoint(strstrip(buffer), 10, &type); if (rv < 0) return rv; - if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) + if ((type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) && + type != CLEAR_REFS_MM_HIWATER_RSS) return -EINVAL; task = get_proc_task(file->f_path.dentry->d_inode); if (!task) @@ -779,6 +781,18 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .pmd_entry = clear_refs_pte_range, .mm = mm, }; + + if (type == CLEAR_REFS_MM_HIWATER_RSS) { + /* + * Writing 5 to /proc/pid/clear_refs resets the peak + * resident set size to this mm's current rss value. + */ + down_write(&mm->mmap_sem); + reset_mm_hiwater_rss(mm); + up_write(&mm->mmap_sem); + goto out_mm; + } + down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; @@ -802,6 +816,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, } flush_tlb_mm(mm); up_read(&mm->mmap_sem); +out_mm: mmput(mm); } put_task_struct(task); diff --git a/fs/select.c b/fs/select.c index 0baa0a351a1..3954a668bee 100644 --- a/fs/select.c +++ b/fs/select.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -236,7 +237,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, set_current_state(state); if (!pwq->triggered) - rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); + rc = freezable_schedule_hrtimeout_range(expires, slack, + HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* diff --git a/fs/timerfd.c b/fs/timerfd.c index cce622d35fb..4cf18d63ccb 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -115,8 +115,9 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx) static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { spin_lock(&ctx->cancel_lock); - if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) && - (flags & TFD_TIMER_CANCEL_ON_SET)) { + if ((ctx->clockid == CLOCK_REALTIME || + ctx->clockid == CLOCK_REALTIME_ALARM) && + (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { if (!ctx->might_cancel) { ctx->might_cancel = true; spin_lock(&cancel_lock); @@ -270,7 +271,10 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) if ((flags & ~TFD_CREATE_FLAGS) || (clockid != CLOCK_MONOTONIC && - clockid != CLOCK_REALTIME)) + clockid != CLOCK_REALTIME && + clockid != CLOCK_REALTIME_ALARM && + clockid != CLOCK_BOOTTIME && + clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 96c5c249b08..e0520cbd1f6 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -35,6 +35,7 @@ enum alarmtimer_restart { */ struct alarm { struct timerqueue_node node; + struct hrtimer timer; enum alarmtimer_restart (*function)(struct alarm *, ktime_t now); enum alarmtimer_type type; int state; @@ -43,11 +44,15 @@ struct alarm { void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); -void alarm_start(struct alarm *alarm, ktime_t start); +int alarm_start(struct alarm *alarm, ktime_t start); +int alarm_start_relative(struct alarm *alarm, ktime_t start); +void alarm_restart(struct alarm *alarm); int alarm_try_to_cancel(struct alarm *alarm); int alarm_cancel(struct alarm *alarm); u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval); +u64 alarm_forward_now(struct alarm *alarm, ktime_t interval); +ktime_t alarm_expires_remaining(const struct alarm *alarm); /* * A alarmtimer is active, when it is enqueued into timerqueue or the diff --git a/include/linux/android_aid.h b/include/linux/android_aid.h index 0f904b3ba7f..06264b8be5f 100644 --- a/include/linux/android_aid.h +++ b/include/linux/android_aid.h @@ -17,8 +17,8 @@ #define _LINUX_ANDROID_AID_H /* AIDs that the kernel treats differently */ -#define AID_NET_BT_ADMIN 3001 -#define AID_NET_BT 3002 +#define AID_OBSOLETE_000 3001 /* was NET_BT_ADMIN */ +#define AID_OBSOLETE_001 3002 /* was NET_BT */ #define AID_INET 3003 #define AID_NET_RAW 3004 #define AID_NET_ADMIN 3005 diff --git a/include/linux/ashmem.h b/include/linux/ashmem.h index 85b5b686341..e20b1f546a7 100644 --- a/include/linux/ashmem.h +++ b/include/linux/ashmem.h @@ -14,6 +14,7 @@ #include #include +#include #define ASHMEM_NAME_LEN 256 diff --git a/include/linux/capability.h b/include/linux/capability.h index 12d52dedb22..c398cff3dab 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,8 +360,11 @@ struct cpu_vfs_cap_data { #define CAP_WAKE_ALARM 35 +/* Allow preventing system suspends while epoll events are pending */ -#define CAP_LAST_CAP CAP_WAKE_ALARM +#define CAP_EPOLLWAKEUP 36 + +#define CAP_LAST_CAP CAP_EPOLLWAKEUP #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 3bd46f76675..a975de1ff59 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -51,7 +51,7 @@ struct task_struct; extern void debug_show_all_locks(void); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); -extern void debug_check_no_locks_held(struct task_struct *task); +extern void debug_check_no_locks_held(void); #else static inline void debug_show_all_locks(void) { @@ -67,7 +67,7 @@ debug_check_no_locks_freed(const void *from, unsigned long len) } static inline void -debug_check_no_locks_held(struct task_struct *task) +debug_check_no_locks_held(void) { } #endif diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 657ab55beda..6f8be328770 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -26,6 +26,18 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* + * Request the handling of system wakeup events so as to prevent system suspends + * from happening while those events are being processed. + * + * Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will not be + * re-allowed until epoll_wait is called again after consuming the wakeup + * event(s). + * + * Requires CAP_EPOLLWAKEUP + */ +#define EPOLLWAKEUP (1 << 29) + /* Set the One Shot behaviour for the target file descriptor */ #define EPOLLONESHOT (1 << 30) diff --git a/include/linux/freezer.h b/include/linux/freezer.h index e9a15b22c9b..ef22a8c243a 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -3,6 +3,7 @@ #ifndef FREEZER_H_INCLUDED #define FREEZER_H_INCLUDED +#include #include #include #include @@ -41,7 +42,22 @@ extern int freeze_kernel_threads(void); extern void thaw_processes(void); extern void thaw_kernel_threads(void); -static inline bool try_to_freeze(void) +/* + * HACK: prevent sleeping while atomic warnings due to ARM signal handling + * disabling irqs + */ +static inline bool try_to_freeze_nowarn(void) +{ + if (likely(!freezing(current))) + return false; + return __refrigerator(false); +} + +/* + * DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION + * If try_to_freeze causes a lockdep warning it means the caller may deadlock + */ +static inline bool try_to_freeze_unsafe(void) { /* This causes problems for ARM targets and is a known * problem upstream. @@ -52,6 +68,13 @@ static inline bool try_to_freeze(void) return __refrigerator(false); } +static inline bool try_to_freeze(void) +{ + if (!(current->flags & PF_NOFREEZE)) + debug_check_no_locks_held(); + return try_to_freeze_unsafe(); +} + extern bool freeze_task(struct task_struct *p); extern bool set_freezable(void); @@ -113,6 +136,14 @@ static inline void freezer_count(void) try_to_freeze(); } +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +static inline void freezer_count_unsafe(void) +{ + current->flags &= ~PF_FREEZER_SKIP; + smp_mb(); + try_to_freeze_unsafe(); +} + /** * freezer_should_skip - whether to skip a task when determining frozen * state is reached @@ -137,29 +168,87 @@ static inline bool freezer_should_skip(struct task_struct *p) } /* - * These macros are intended to be used whenever you want allow a task that's + * These functions are intended to be used whenever you want allow a task that's * sleeping in TASK_UNINTERRUPTIBLE or TASK_KILLABLE state to be frozen. Note * that neither return any clear indication of whether a freeze event happened * while in this function. */ /* Like schedule(), but should not block the freezer. */ -#define freezable_schedule() \ -({ \ - freezer_do_not_count(); \ - schedule(); \ - freezer_count(); \ -}) +static inline void freezable_schedule(void) +{ + freezer_do_not_count(); + schedule(); + freezer_count(); +} + +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +static inline void freezable_schedule_unsafe(void) +{ + freezer_do_not_count(); + schedule(); + freezer_count_unsafe(); +} + +/* + * Like freezable_schedule_timeout(), but should not block the freezer. Do not + * call this with locks held. + */ +static inline long freezable_schedule_timeout(long timeout) +{ + long __retval; + freezer_do_not_count(); + __retval = schedule_timeout(timeout); + freezer_count(); + return __retval; +} + +/* + * Like schedule_timeout_interruptible(), but should not block the freezer. Do not + * call this with locks held. + */ +static inline long freezable_schedule_timeout_interruptible(long timeout) +{ + long __retval; + freezer_do_not_count(); + __retval = schedule_timeout_interruptible(timeout); + freezer_count(); + return __retval; +} /* Like schedule_timeout_killable(), but should not block the freezer. */ -#define freezable_schedule_timeout_killable(timeout) \ -({ \ - long __retval; \ - freezer_do_not_count(); \ - __retval = schedule_timeout_killable(timeout); \ - freezer_count(); \ - __retval; \ -}) +static inline long freezable_schedule_timeout_killable(long timeout) +{ + long __retval; + freezer_do_not_count(); + __retval = schedule_timeout_killable(timeout); + freezer_count(); + return __retval; +} + +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +static inline long freezable_schedule_timeout_killable_unsafe(long timeout) +{ + long __retval; + freezer_do_not_count(); + __retval = schedule_timeout_killable(timeout); + freezer_count_unsafe(); + return __retval; +} + +/* + * Like schedule_hrtimeout_range(), but should not block the freezer. Do not + * call this with locks held. + */ +static inline int freezable_schedule_hrtimeout_range(ktime_t *expires, + unsigned long delta, const enum hrtimer_mode mode) +{ + int __retval; + freezer_do_not_count(); + __retval = schedule_hrtimeout_range(expires, delta, mode); + freezer_count(); + return __retval; +} /* * Freezer-friendly wrappers around wait_event_interruptible(), @@ -176,33 +265,45 @@ static inline bool freezer_should_skip(struct task_struct *p) __retval; \ }) +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +#define wait_event_freezekillable_unsafe(wq, condition) \ +({ \ + int __retval; \ + freezer_do_not_count(); \ + __retval = wait_event_killable(wq, (condition)); \ + freezer_count_unsafe(); \ + __retval; \ +}) + #define wait_event_freezable(wq, condition) \ ({ \ int __retval; \ - for (;;) { \ - __retval = wait_event_interruptible(wq, \ - (condition) || freezing(current)); \ - if (__retval || (condition)) \ - break; \ - try_to_freeze(); \ - } \ + freezer_do_not_count(); \ + __retval = wait_event_interruptible(wq, (condition)); \ + freezer_count(); \ __retval; \ }) #define wait_event_freezable_timeout(wq, condition, timeout) \ ({ \ long __retval = timeout; \ - for (;;) { \ - __retval = wait_event_interruptible_timeout(wq, \ - (condition) || freezing(current), \ - __retval); \ - if (__retval <= 0 || (condition)) \ - break; \ - try_to_freeze(); \ - } \ + freezer_do_not_count(); \ + __retval = wait_event_interruptible_timeout(wq, (condition), \ + __retval); \ + freezer_count(); \ __retval; \ }) +#define wait_event_freezable_exclusive(wq, condition) \ +({ \ + int __retval; \ + freezer_do_not_count(); \ + __retval = wait_event_interruptible_exclusive(wq, condition); \ + freezer_count(); \ + __retval; \ +}) + + #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } @@ -214,6 +315,7 @@ static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} static inline void thaw_kernel_threads(void) {} +static inline bool try_to_freeze_nowarn(void) { return false; } static inline bool try_to_freeze(void) { return false; } static inline void freezer_do_not_count(void) {} @@ -223,18 +325,37 @@ static inline void set_freezable(void) {} #define freezable_schedule() schedule() +#define freezable_schedule_unsafe() schedule() + +#define freezable_schedule_timeout(timeout) schedule_timeout(timeout) + +#define freezable_schedule_timeout_interruptible(timeout) \ + schedule_timeout_interruptible(timeout) + #define freezable_schedule_timeout_killable(timeout) \ schedule_timeout_killable(timeout) +#define freezable_schedule_timeout_killable_unsafe(timeout) \ + schedule_timeout_killable(timeout) + +#define freezable_schedule_hrtimeout_range(expires, delta, mode) \ + schedule_hrtimeout_range(expires, delta, mode) + #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) #define wait_event_freezable_timeout(wq, condition, timeout) \ wait_event_interruptible_timeout(wq, condition, timeout) +#define wait_event_freezable_exclusive(wq, condition) \ + wait_event_interruptible_exclusive(wq, condition) + #define wait_event_freezekillable(wq, condition) \ wait_event_killable(wq, condition) +#define wait_event_freezekillable_unsafe(wq, condition) \ + wait_event_killable(wq, condition) + #endif /* !CONFIG_FREEZER */ #endif /* FREEZER_H_INCLUDED */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 89bf453c0d8..25ba7b5203f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -339,6 +339,8 @@ struct inodes_stat_t { #define FITHAW _IOWR('X', 120, int) /* Thaw */ #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ +#define FIDTRIM _IOWR('f', 128, struct fstrim_range) /* Deep discard trim */ + #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) #define FS_IOC_GETVERSION _IOR('v', 1, long) @@ -1885,13 +1887,13 @@ int sync_inode_metadata(struct inode *inode, int wait); struct file_system_type { const char *name; int fs_flags; -#define FS_REQUIRES_DEV 1 -#define FS_BINARY_MOUNTDATA 2 -#define FS_HAS_SUBTYPE 4 -#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ -#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ -#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ -#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ +#define FS_REQUIRES_DEV 1 +#define FS_BINARY_MOUNTDATA 2 +#define FS_HAS_SUBTYPE 4 +#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ +#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ +#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ +#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); struct dentry *(*mount2) (struct vfsmount *, struct file_system_type *, int, diff --git a/include/linux/hid.h b/include/linux/hid.h index 0322a913255..879a1fc2b01 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -482,6 +482,7 @@ struct hid_device { /* device report descriptor */ enum hid_type type; /* device type (mouse, kbd, ...) */ unsigned country; /* HID country */ struct hid_report_enum report_enum[HID_REPORT_TYPES]; + struct work_struct led_work; /* delayed LED worker */ struct semaphore driver_lock; /* protects the current driver */ struct device dev; /* device */ diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index f1362b5447f..c626de05f33 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -133,6 +133,7 @@ struct tcpvegas_info { }; #ifdef __KERNEL__ +struct net; struct sock; struct inet_hashinfo; struct nlattr; @@ -153,6 +154,10 @@ struct inet_diag_handler { void (*idiag_get_info)(struct sock *sk, struct inet_diag_msg *r, void *info); + + int (*destroy)(struct sk_buff *in_skb, + struct inet_diag_req_v2 *req); + __u16 idiag_type; }; @@ -168,6 +173,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req); +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + struct inet_diag_req_v2 *req); + int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); extern int inet_diag_register(const struct inet_diag_handler *handler); diff --git a/include/linux/input.h b/include/linux/input.h index 4618f8c4ef7..a7e71dfc7fa 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -204,6 +204,8 @@ struct input_keymap_entry { #define SYN_CONFIG 1 #define SYN_MT_REPORT 2 #define SYN_DROPPED 3 +#define SYN_TIME_SEC 4 +#define SYN_TIME_NSEC 5 /* * Keys and buttons @@ -470,10 +472,14 @@ struct input_keymap_entry { #define KEY_VIDEO_NEXT 241 /* drive next video source */ #define KEY_VIDEO_PREV 242 /* drive previous video source */ #define KEY_BRIGHTNESS_CYCLE 243 /* brightness up, after max is min */ -#define KEY_BRIGHTNESS_ZERO 244 /* brightness off, use ambient */ +#define KEY_BRIGHTNESS_AUTO 244 /* Set Auto Brightness: manual + brightness control is off, + rely on ambient */ +#define KEY_BRIGHTNESS_ZERO KEY_BRIGHTNESS_AUTO #define KEY_DISPLAY_OFF 245 /* display device to off state */ -#define KEY_WIMAX 246 +#define KEY_WWAN 246 /* Wireless WAN (LTE, UMTS, GSM, etc.) */ +#define KEY_WIMAX KEY_WWAN #define KEY_RFKILL 247 /* Key that controls all radios */ #define KEY_MICMUTE 248 /* Mute / unmute the microphone */ @@ -525,11 +531,15 @@ struct input_keymap_entry { #define BTN_DEAD 0x12f #define BTN_GAMEPAD 0x130 -#define BTN_A 0x130 -#define BTN_B 0x131 +#define BTN_SOUTH 0x130 +#define BTN_A BTN_SOUTH +#define BTN_EAST 0x131 +#define BTN_B BTN_EAST #define BTN_C 0x132 -#define BTN_X 0x133 -#define BTN_Y 0x134 +#define BTN_NORTH 0x133 +#define BTN_X BTN_NORTH +#define BTN_WEST 0x134 +#define BTN_Y BTN_WEST #define BTN_Z 0x135 #define BTN_TL 0x136 #define BTN_TR 0x137 @@ -643,6 +653,7 @@ struct input_keymap_entry { #define KEY_ADDRESSBOOK 0x1ad /* AL Contacts/Address Book */ #define KEY_MESSENGER 0x1ae /* AL Instant Messaging */ #define KEY_DISPLAYTOGGLE 0x1af /* Turn display (LCD) on and off */ +#define KEY_BRIGHTNESS_TOGGLE KEY_DISPLAYTOGGLE #define KEY_SPELLCHECK 0x1b0 /* AL Spell Check */ #define KEY_LOGOFF 0x1b1 /* AL Logoff */ @@ -735,6 +746,23 @@ struct input_keymap_entry { #if defined(CONFIG_SAMSUNG_JACK_VOICE_BTN) #define KEY_VOICECOMMAND 0x246 /* voice command*/ #endif +#define BTN_DPAD_UP 0x250 +#define BTN_DPAD_DOWN 0x251 +#define BTN_DPAD_LEFT 0x252 +#define BTN_DPAD_RIGHT 0x253 + +#define KEY_ALS_TOGGLE 0x260 /* Ambient light sensor */ + +#define KEY_BUTTONCONFIG 0x270 /* AL Button Configuration */ +#define KEY_TASKMANAGER 0x271 /* AL Task/Project Manager */ +#define KEY_JOURNAL 0x272 /* AL Log/Journal/Timecard */ +#define KEY_CONTROLPANEL 0x273 /* AL Control Panel */ +#define KEY_APPSELECT 0x274 /* AL Select Task/Application */ +#define KEY_SCREENSAVER 0x275 /* AL Screen Saver */ +#define KEY_VOICECOMMAND 0x276 /* Listening Voice Command */ + +#define KEY_BRIGHTNESS_MIN 0x280 /* Set Brightness to Minimum */ +#define KEY_BRIGHTNESS_MAX 0x281 /* Set Brightness to Maximum */ #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 @@ -916,6 +944,7 @@ struct input_keymap_entry { #define SW_RIGHT_HAND 0x18 /* set = right hand*/ #define SW_BOTH_HAND 0x19 /* set = both hand*/ #define SW_W1 0x1A /* set = w1_slave */ +#define SW_MUTE_DEVICE 0x1b /* set = device disabled */ #define SW_MAX 0x20 #define SW_CNT (SW_MAX+1) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7627f791876..8b2d7f4a8d1 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -166,6 +166,7 @@ struct ipv6_devconf { __s32 accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD __s32 optimistic_dad; + __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE __s32 mc_forwarding; @@ -175,6 +176,7 @@ struct ipv6_devconf { __s32 force_tllao; __s32 accept_ra_prefix_route; __s32 accept_ra_mtu; + __s32 use_oif_addrs_only; void *sysctl; }; @@ -219,6 +221,8 @@ enum { DEVCONF_ACCEPT_RA_PREFIX_ROUTE, DEVCONF_ACCEPT_RA_RT_TABLE, DEVCONF_ACCEPT_RA_MTU, + DEVCONF_USE_OPTIMISTIC, + DEVCONF_USE_OIF_ADDRS_ONLY, DEVCONF_MAX }; @@ -382,7 +386,7 @@ struct ipv6_pinfo { struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist *ipv6_fl_list; - struct ipv6_txoptions *opt; + struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct { diff --git a/include/linux/mm.h b/include/linux/mm.h index 38033df2be3..767111b70f0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1169,6 +1169,11 @@ static inline void update_hiwater_vm(struct mm_struct *mm) mm->hiwater_vm = mm->total_vm; } +static inline void reset_mm_hiwater_rss(struct mm_struct *mm) +{ + mm->hiwater_rss = get_mm_rss(mm); +} + static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, struct mm_struct *mm) { diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 2e9d475e57f..12b2ab51032 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -7,7 +7,7 @@ struct fs_struct; struct user_namespace; extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, - struct user_namespace *, struct fs_struct *); + struct user_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); extern const struct file_operations proc_mounts_operations; diff --git a/include/linux/nmi.h b/include/linux/nmi.h index db50840e635..c8f8aa0383e 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -14,8 +14,11 @@ * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR_NMI) #include +#endif + +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) extern void touch_nmi_watchdog(void); #else static inline void touch_nmi_watchdog(void) diff --git a/include/linux/platform_data/ds2482.h b/include/linux/platform_data/ds2482.h new file mode 100644 index 00000000000..5a6879e2a09 --- /dev/null +++ b/include/linux/platform_data/ds2482.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2012 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __PLATFORM_DATA_DS2482__ +#define __PLATFORM_DATA_DS2482__ + +struct ds2482_platform_data { + int slpz_gpio; +}; + +#endif /* __PLATFORM_DATA_DS2482__ */ diff --git a/include/linux/pm.h b/include/linux/pm.h index 715305e0512..f067e60a383 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -544,8 +544,6 @@ struct dev_pm_info { unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; - ktime_t suspend_time; - s64 max_time_suspended_ns; struct dev_pm_qos_request *pq_req; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 91f8286106e..30f794eb382 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -14,6 +14,7 @@ #include #include #include +#include enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -70,9 +71,9 @@ struct generic_pm_domain { int (*power_on)(struct generic_pm_domain *domain); s64 power_on_latency_ns; struct gpd_dev_ops dev_ops; - s64 break_even_ns; /* Power break even for the entire domain. */ s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ - ktime_t power_off_time; + bool max_off_time_changed; + bool cached_power_down_ok; struct device_node *of_node; /* Node in device tree */ }; @@ -93,13 +94,17 @@ struct gpd_timing_data { s64 start_latency_ns; s64 save_state_latency_ns; s64 restore_state_latency_ns; - s64 break_even_ns; + s64 effective_constraint_ns; + bool constraint_changed; + bool cached_stop_ok; }; struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_dev_ops ops; struct gpd_timing_data td; + struct notifier_block nb; + struct mutex lock; bool need_restore; bool always_on; }; @@ -141,6 +146,7 @@ static inline int pm_genpd_of_add_device(struct device_node *genpd_node, extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev); extern void pm_genpd_dev_always_on(struct device *dev, bool val); +extern void pm_genpd_dev_need_restore(struct device *dev, bool val); extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, @@ -184,6 +190,7 @@ static inline int pm_genpd_remove_device(struct generic_pm_domain *genpd, return -ENOSYS; } static inline void pm_genpd_dev_always_on(struct device *dev, bool val) {} +static inline void pm_genpd_dev_need_restore(struct device *dev, bool val) {} static inline int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_sd) { diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 609daae7a01..f271860c78d 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -150,9 +150,6 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev, static inline unsigned long pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } -static inline void pm_runtime_update_max_time_suspended(struct device *dev, - s64 delta_ns) {} - #endif /* !CONFIG_PM_RUNTIME */ static inline int pm_runtime_idle(struct device *dev) diff --git a/include/linux/power/smb347-charger.h b/include/linux/power/smb347-charger.h index b3cb20dab55..e9aab94437f 100644 --- a/include/linux/power/smb347-charger.h +++ b/include/linux/power/smb347-charger.h @@ -110,8 +110,14 @@ struct smb347_charger_platform_data { bool use_mains; bool use_usb; bool use_usb_otg; + bool disable_automatic_recharge; int irq_gpio; + bool disable_stat_interrupts; enum smb347_chg_enable enable_control; + bool usb_mode_pin_ctrl; + char **supplied_to; + size_t num_supplicants; + int en_gpio; }; #endif /* SMB347_CHARGER_H */ diff --git a/include/linux/random.h b/include/linux/random.h index 54b1fd3efdf..ef258255610 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -63,6 +63,7 @@ extern const struct file_operations random_fops, urandom_fops; #endif unsigned int get_random_int(void); +unsigned long get_random_long(void); unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); u32 random32(void); diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index ec5b3d0e402..d31f17a8067 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -16,6 +16,7 @@ static inline void sched_clock_postinit(void) { } extern void sched_clock_register(u64 (*read)(void), int bits, unsigned long rate); +extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate); #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index 865e8ac1adf..06b2400eb38 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -487,6 +487,11 @@ static inline void delete_from_swap_cache(struct page *page) { } +static inline int page_swapcount(struct page *page) +{ + return 0; +} + static inline int swp_swapcount(swp_entry_t entry) { return 0; diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h new file mode 100644 index 00000000000..1c97fa86c4a --- /dev/null +++ b/include/linux/vmpressure.h @@ -0,0 +1,47 @@ +#ifndef __LINUX_VMPRESSURE_H +#define __LINUX_VMPRESSURE_H + +#include +#include +#include +#include +#include +#include + +struct vmpressure { + unsigned long scanned; + unsigned long reclaimed; + /* The lock is used to keep the scanned/reclaimed above in sync. */ + struct mutex sr_lock; + + /* The list of vmpressure_event structs. */ + struct list_head events; + /* Have to grab the lock on events traversal or modifications. */ + struct mutex events_lock; + + struct work_struct work; +}; + +struct mem_cgroup; + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR +extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, + unsigned long scanned, unsigned long reclaimed); +extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); + +extern void vmpressure_init(struct vmpressure *vmpr); +extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); +extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); +extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); +extern int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, + struct eventfd_ctx *eventfd, + const char *args); +extern void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, + struct eventfd_ctx *eventfd); +#else +static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, + unsigned long scanned, unsigned long reclaimed) {} +static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, + int prio) {} +#endif /* CONFIG_CGROUP_MEM_RES_CTLR */ +#endif /* __LINUX_VMPRESSURE_H */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index cdfd9275687..3badeaea42c 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -168,6 +168,9 @@ extern int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr); extern int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr); extern int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); +extern void ipv6_ac_destroy_dev(struct inet6_dev *idev); + +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table); u32 addrconf_rt_table(const struct net_device *dev, u32 default_table); extern void ipv6_ac_destroy_dev(struct inet6_dev *idev); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f8c667a5115..b55d3466e27 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -203,6 +203,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { + atomic_t refcnt; /* Length of this structure */ int tot_len; @@ -215,7 +216,7 @@ struct ipv6_txoptions { struct ipv6_opt_hdr *dst0opt; struct ipv6_rt_hdr *srcrt; /* Routing Header */ struct ipv6_opt_hdr *dst1opt; - + struct rcu_head rcu; /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; @@ -241,6 +242,24 @@ struct ipv6_fl_socklist { struct ip6_flowlabel *fl; }; +static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) +{ + struct ipv6_txoptions *opt; + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt && !atomic_inc_not_zero(&opt->refcnt)) + opt = NULL; + rcu_read_unlock(); + return opt; +} + +static inline void txopt_put(struct ipv6_txoptions *opt) +{ + if (opt && atomic_dec_and_test(&opt->refcnt)) + kfree_rcu(opt, rcu); +} + extern struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); extern struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, struct ip6_flowlabel * fl, diff --git a/include/net/tcp.h b/include/net/tcp.h index b1e3f58e3a1..19c27bcd233 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -264,6 +264,7 @@ extern int sysctl_tcp_default_init_rwnd; extern int sysctl_tcp_delack_seg; extern int sysctl_tcp_use_userconfig; extern int sysctl_tcp_challenge_ack_limit; +extern int sysctl_tcp_default_init_rwnd; /* sysctl variables for controlling various tcp parameters */ extern int sysctl_tcp_delack_seg; @@ -1007,6 +1008,8 @@ extern void tcp_set_state(struct sock *sk, int state); extern void tcp_done(struct sock *sk); +int tcp_abort(struct sock *sk, int err); + static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; diff --git a/include/sound/soc.h b/include/sound/soc.h index 278ada3afee..b37f0b37d0b 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -332,6 +332,11 @@ enum snd_soc_dpcm_trigger { SND_SOC_DPCM_TRIGGER_BESPOKE, }; +enum snd_soc_card_subclass { + SND_SOC_CARD_CLASS_INIT = 0, + SND_SOC_CARD_CLASS_PCM = 1, +}; + int snd_soc_codec_set_sysclk(struct snd_soc_codec *codec, int clk_id, int source, unsigned int freq, int dir); int snd_soc_codec_set_pll(struct snd_soc_codec *codec, int pll_id, int source, diff --git a/include/trace/events/gpu.h b/include/trace/events/gpu.h new file mode 100644 index 00000000000..7e15cdfafe5 --- /dev/null +++ b/include/trace/events/gpu.h @@ -0,0 +1,143 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu + +#if !defined(_TRACE_GPU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_GPU_H + +#include +#include + +#define show_secs_from_ns(ns) \ + ({ \ + u64 t = ns + (NSEC_PER_USEC / 2); \ + do_div(t, NSEC_PER_SEC); \ + t; \ + }) + +#define show_usecs_from_ns(ns) \ + ({ \ + u64 t = ns + (NSEC_PER_USEC / 2) ; \ + u32 rem; \ + do_div(t, NSEC_PER_USEC); \ + rem = do_div(t, USEC_PER_SEC); \ + }) + +/* + * The gpu_sched_switch event indicates that a switch from one GPU context to + * another occurred on one of the GPU hardware blocks. + * + * The gpu_name argument identifies the GPU hardware block. Each independently + * scheduled GPU hardware block should have a different name. This may be used + * in different ways for different GPUs. For example, if a GPU includes + * multiple processing cores it may use names "GPU 0", "GPU 1", etc. If a GPU + * includes a separately scheduled 2D and 3D hardware block, it might use the + * names "2D" and "3D". + * + * The timestamp argument is the timestamp at which the switch occurred on the + * GPU. These timestamps are in units of nanoseconds and must use + * approximately the same time as sched_clock, though they need not come from + * any CPU clock. The timestamps for a single hardware block must be + * monotonically nondecreasing. This means that if a variable compensation + * offset is used to translate from some other clock to the sched_clock, then + * care must be taken when increasing that offset, and doing so may result in + * multiple events with the same timestamp. + * + * The next_ctx_id argument identifies the next context that was running on + * the GPU hardware block. A value of 0 indicates that the hardware block + * will be idle. + * + * The next_prio argument indicates the priority of the next context at the + * time of the event. The exact numeric values may mean different things for + * different GPUs, but they should follow the rule that lower values indicate a + * higher priority. + * + * The next_job_id argument identifies the batch of work that the GPU will be + * working on. This should correspond to a job_id that was previously traced + * as a gpu_job_enqueue event when the batch of work was created. + */ +TRACE_EVENT(gpu_sched_switch, + + TP_PROTO(const char *gpu_name, u64 timestamp, + u32 next_ctx_id, s32 next_prio, u32 next_job_id), + + TP_ARGS(gpu_name, timestamp, next_ctx_id, next_prio, next_job_id), + + TP_STRUCT__entry( + __string( gpu_name, gpu_name ) + __field( u64, timestamp ) + __field( u32, next_ctx_id ) + __field( s32, next_prio ) + __field( u32, next_job_id ) + ), + + TP_fast_assign( + __assign_str(gpu_name, gpu_name); + __entry->timestamp = timestamp; + __entry->next_ctx_id = next_ctx_id; + __entry->next_prio = next_prio; + __entry->next_job_id = next_job_id; + ), + + TP_printk("gpu_name=%s ts=%llu.%06lu next_ctx_id=%lu next_prio=%ld " + "next_job_id=%lu", + __get_str(gpu_name), + (unsigned long long)show_secs_from_ns(__entry->timestamp), + (unsigned long)show_usecs_from_ns(__entry->timestamp), + (unsigned long)__entry->next_ctx_id, + (long)__entry->next_prio, + (unsigned long)__entry->next_job_id) +); + +/* + * The gpu_job_enqueue event indicates that a batch of work has been queued up + * to be processed by the GPU. This event is not intended to indicate that + * the batch of work has been submitted to the GPU hardware, but rather that + * it has been submitted to the GPU kernel driver. + * + * This event should be traced on the thread that initiated the work being + * queued. For example, if a batch of work is submitted to the kernel by a + * userland thread, the event should be traced on that thread. + * + * The ctx_id field identifies the GPU context in which the batch of work + * being queued is to be run. + * + * The job_id field identifies the batch of work being queued within the given + * GPU context. The first batch of work submitted for a given GPU context + * should have a job_id of 0, and each subsequent batch of work should + * increment the job_id by 1. + * + * The type field identifies the type of the job being enqueued. The job + * types may be different for different GPU hardware. For example, a GPU may + * differentiate between "2D", "3D", and "compute" jobs. + */ +TRACE_EVENT(gpu_job_enqueue, + + TP_PROTO(u32 ctx_id, u32 job_id, const char *type), + + TP_ARGS(ctx_id, job_id, type), + + TP_STRUCT__entry( + __field( u32, ctx_id ) + __field( u32, job_id ) + __string( type, type ) + ), + + TP_fast_assign( + __entry->ctx_id = ctx_id; + __entry->job_id = job_id; + __assign_str(type, type); + ), + + TP_printk("ctx_id=%lu job_id=%lu type=%s", + (unsigned long)__entry->ctx_id, + (unsigned long)__entry->job_id, + __get_str(type)) +); + +#undef show_secs_from_ns +#undef show_usecs_from_ns + +#endif /* _TRACE_GPU_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/mmc.h b/include/trace/events/mmc.h index 37115c4b5ac..5ca35ba8325 100644 --- a/include/trace/events/mmc.h +++ b/include/trace/events/mmc.h @@ -1,14 +1,15 @@ /* - * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * Copyright (C) 2013 Google, Inc. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. + * */ #undef TRACE_SYSTEM @@ -18,6 +19,72 @@ #define _TRACE_MMC_H #include +#include +#include + +/* + * Unconditional logging of mmc block erase operations, + * including cmd, address, size + */ +DECLARE_EVENT_CLASS(mmc_blk_erase_class, + TP_PROTO(unsigned int cmd, unsigned int addr, unsigned int size), + TP_ARGS(cmd, addr, size), + TP_STRUCT__entry( + __field(unsigned int, cmd) + __field(unsigned int, addr) + __field(unsigned int, size) + ), + TP_fast_assign( + __entry->cmd = cmd; + __entry->addr = addr; + __entry->size = size; + ), + TP_printk("cmd=%u,addr=0x%08x,size=0x%08x", + __entry->cmd, __entry->addr, __entry->size) +); + +DEFINE_EVENT(mmc_blk_erase_class, mmc_blk_erase_start, + TP_PROTO(unsigned int cmd, unsigned int addr, unsigned int size), + TP_ARGS(cmd, addr, size)); + +DEFINE_EVENT(mmc_blk_erase_class, mmc_blk_erase_end, + TP_PROTO(unsigned int cmd, unsigned int addr, unsigned int size), + TP_ARGS(cmd, addr, size)); + +/* + * Logging of start of read or write mmc block operation, + * including cmd, address, size + */ +DECLARE_EVENT_CLASS(mmc_blk_rw_class, + TP_PROTO(unsigned int cmd, unsigned int addr, struct mmc_data *data), + TP_ARGS(cmd, addr, data), + TP_STRUCT__entry( + __field(unsigned int, cmd) + __field(unsigned int, addr) + __field(unsigned int, size) + ), + TP_fast_assign( + __entry->cmd = cmd; + __entry->addr = addr; + __entry->size = data->blocks; + ), + TP_printk("cmd=%u,addr=0x%08x,size=0x%08x", + __entry->cmd, __entry->addr, __entry->size) +); + +DEFINE_EVENT_CONDITION(mmc_blk_rw_class, mmc_blk_rw_start, + TP_PROTO(unsigned int cmd, unsigned int addr, struct mmc_data *data), + TP_ARGS(cmd, addr, data), + TP_CONDITION(((cmd == MMC_READ_MULTIPLE_BLOCK) || + (cmd == MMC_WRITE_MULTIPLE_BLOCK)) && + data)); + +DEFINE_EVENT_CONDITION(mmc_blk_rw_class, mmc_blk_rw_end, + TP_PROTO(unsigned int cmd, unsigned int addr, struct mmc_data *data), + TP_ARGS(cmd, addr, data), + TP_CONDITION(((cmd == MMC_READ_MULTIPLE_BLOCK) || + (cmd == MMC_WRITE_MULTIPLE_BLOCK)) && + data)); TRACE_EVENT(mmc_clk, TP_PROTO(char *print_info), @@ -36,8 +103,7 @@ TRACE_EVENT(mmc_clk, __get_str(print_info) ) ); - -#endif /* if !defined(_TRACE_MMC_H) || defined(TRACE_HEADER_MULTI_READ) */ +#endif /* _TRACE_MMC_H */ /* This part must be outside protection */ #include diff --git a/include/trace/events/net.h b/include/trace/events/net.h index f99645d05a8..47e845418e6 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -32,7 +32,7 @@ TRACE_EVENT(net_dev_xmit, __assign_str(name, dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + TP_printk("dev=%s skbaddr=%pK len=%u rc=%d", __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) ); @@ -54,7 +54,7 @@ DECLARE_EVENT_CLASS(net_dev_template, __assign_str(name, skb->dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u", + TP_printk("dev=%s skbaddr=%pK len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 30767f41ccc..e7320ebddcc 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3963,23 +3963,23 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) return !failed; } -/* checks if all of the css_sets attached to a cgroup have a refcount of 0. - * Must be called with css_set_lock held */ +/* Checks if all of the css_sets attached to a cgroup have a refcount of 0. */ static int cgroup_css_sets_empty(struct cgroup *cgrp) { struct cg_cgroup_link *link; + int retval = 1; read_lock(&css_set_lock); list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { struct css_set *cg = link->cg; if (cg && (atomic_read(&cg->refcount) > 0)) { - read_unlock(&css_set_lock); - return 0; + retval = 0; + break; } } - read_unlock(&css_set_lock); - return 1; + + return retval; } static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 0dbeae37422..36cc21da4dd 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -68,7 +68,14 @@ lookup_exec_domain(unsigned int personality) goto out; } -#ifdef CONFIG_MODULES +/* + * Disable the request_module here to avoid trying to + * load the personality-8 module, which doesn't exist, + * and results in selinux audit noise. + * Disabling this here avoids folks adding module_request + * to their sepolicy, which is maybe too generous + */ +#if 0 read_unlock(&exec_domains_lock); request_module("personality-%d", pers); read_lock(&exec_domains_lock); diff --git a/kernel/exit.c b/kernel/exit.c index 061269d5f04..6e0d7ca41a9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1045,7 +1045,7 @@ void do_exit(long code) /* * Make sure we are holding no locks: */ - debug_check_no_locks_held(tsk); + debug_check_no_locks_held(); /* * We can do this unlocked here. The futex code uses this flag * just to verify whether the pi state cleanup has been done diff --git a/kernel/freezer.c b/kernel/freezer.c index 2f8ecd994d4..df6fc77a18e 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -113,6 +113,18 @@ bool freeze_task(struct task_struct *p) { unsigned long flags; + /* + * This check can race with freezer_do_not_count, but worst case that + * will result in an extra wakeup being sent to the task. It does not + * race with freezer_count(), the barriers in freezer_count() and + * freezer_should_skip() ensure that either freezer_count() sees + * freezing == true in try_to_freeze() and freezes, or + * freezer_should_skip() sees !PF_FREEZE_SKIP and freezes the task + * normally. + */ + if (freezer_should_skip(p)) + return false; + spin_lock_irqsave(&freezer_lock, flags); if (!freezing(p) || frozen(p)) { spin_unlock_irqrestore(&freezer_lock, flags); diff --git a/kernel/futex.c b/kernel/futex.c index a04158ce9c9..c2db763deb2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -61,6 +61,7 @@ #include #include #include +#include #include @@ -1022,7 +1023,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) if (ret) { raw_spin_unlock(&pi_state->pi_mutex.wait_lock); return ret; - } + } raw_spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); @@ -1936,7 +1937,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, * is no timeout, or if it has yet to expire. */ if (!timeout || timeout->task) - schedule(); + freezable_schedule(); } __set_current_state(TASK_RUNNING); } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 6a72395d314..07976c17fbb 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -45,6 +45,7 @@ #include #include #include +#include #include @@ -1564,7 +1565,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod t->task = NULL; if (likely(t->task)) - schedule(); + freezable_schedule(); hrtimer_cancel(&t->timer); mode = HRTIMER_MODE_ABS; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index ea9ee4518c3..fdcf7ec6c88 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -4044,7 +4044,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) } EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); -static void print_held_locks_bug(struct task_struct *curr) +static void print_held_locks_bug(void) { if (!debug_locks_off()) return; @@ -4053,22 +4053,21 @@ static void print_held_locks_bug(struct task_struct *curr) printk("\n"); printk("=====================================\n"); - printk("[ BUG: lock held at task exit time! ]\n"); + printk("[ BUG: %s/%d still has locks held! ]\n", + current->comm, task_pid_nr(current)); print_kernel_ident(); printk("-------------------------------------\n"); - printk("%s/%d is exiting with locks still held!\n", - curr->comm, task_pid_nr(curr)); - lockdep_print_held_locks(curr); - + lockdep_print_held_locks(current); printk("\nstack backtrace:\n"); dump_stack(); } -void debug_check_no_locks_held(struct task_struct *task) +void debug_check_no_locks_held(void) { - if (unlikely(task->lockdep_depth > 0)) - print_held_locks_bug(task); + if (unlikely(current->lockdep_depth > 0)) + print_held_locks_bug(); } +EXPORT_SYMBOL_GPL(debug_check_no_locks_held); void debug_show_all_locks(void) { diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 62bd9adeab8..1051a06c12f 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -28,6 +28,14 @@ config CPU_FREQ_LIMIT_USERSPACE /sys/power/cpufreq_max_limit /sys/power/cpufreq_table +config HAS_WAKELOCK + bool + default y + +config WAKELOCK + bool + default y + config HIBERNATE_CALLBACKS bool diff --git a/kernel/power/Makefile b/kernel/power/Makefile index ab4a51492aa..74c713ba61b 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -11,6 +11,8 @@ obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o +obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o + obj-$(CONFIG_SUSPEND) += wakeup_reason.o diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index dd875cbe0d1..cacf697aebd 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include "power.h" @@ -734,6 +736,17 @@ static int software_resume(void) /* Check if the device is there */ swsusp_resume_device = name_to_dev_t(resume_file); + + /* + * name_to_dev_t is ineffective to verify parition if resume_file is in + * integer format. (e.g. major:minor) + */ + if (isdigit(resume_file[0]) && resume_wait) { + int partno; + while (!get_gendisk(swsusp_resume_device, &partno)) + msleep(10); + } + if (!swsusp_resume_device) { /* * Some device discovery might still be in progress; we need diff --git a/kernel/power/main.c b/kernel/power/main.c index c7d19527592..06f1aea09b1 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -96,64 +96,6 @@ static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr, power_attr(pm_async); -static ssize_t -touch_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - if (tc_ev_processed == 0) - return snprintf(buf, strnlen("touch_event", MAX_BUF) + 1, - "touch_event"); - else - return snprintf(buf, strnlen("null", MAX_BUF) + 1, - "null"); -} - -static ssize_t -touch_event_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t n) -{ - - hrtimer_cancel(&tc_ev_timer); - tc_ev_processed = 0; - - /* set a timer to notify the userspace to stop processing - * touch event - */ - hrtimer_start(&tc_ev_timer, touch_evt_timer_val, HRTIMER_MODE_REL); - - /* wakeup the userspace poll */ - sysfs_notify(kobj, NULL, "touch_event"); - - return n; -} - -power_attr(touch_event); - -static ssize_t -touch_event_timer_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return snprintf(buf, MAX_BUF, "%lld", touch_evt_timer_val.tv64); -} - -static ssize_t -touch_event_timer_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t n) -{ - unsigned long val; - - if (strict_strtoul(buf, 10, &val)) - return -EINVAL; - - touch_evt_timer_val = ktime_set(0, val*1000); - - return n; -} - -power_attr(touch_event_timer); - static void touch_event_fn(struct work_struct *work) { /* wakeup the userspace poll */ @@ -969,10 +911,6 @@ power_attr(pm_trace_dev_match); #endif /* CONFIG_PM_TRACE */ -#ifdef CONFIG_USER_WAKELOCK -power_attr(wake_lock); -power_attr(wake_unlock); -#endif #ifdef CONFIG_SEC_DVFS DEFINE_MUTEX(dvfs_mutex); static unsigned long dvfs_id; @@ -1183,7 +1121,7 @@ power_attr(cpufreq_max_limit); power_attr(cpufreq_min_limit); power_attr(cpufreq_table); #endif -static struct attribute *g[] = { +static struct attribute * g[] = { &state_attr.attr, #ifdef CONFIG_PM_TRACE &pm_trace_attr.attr, @@ -1199,15 +1137,9 @@ static struct attribute *g[] = { &wake_lock_attr.attr, &wake_unlock_attr.attr, #endif - &touch_event_attr.attr, - &touch_event_timer_attr.attr, #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif -#ifdef CONFIG_USER_WAKELOCK - &wake_lock_attr.attr, - &wake_unlock_attr.attr, -#endif #endif #ifdef CONFIG_CPU_FREQ_LIMIT_USERSPACE &cpufreq_table_attr.attr, diff --git a/kernel/power/process.c b/kernel/power/process.c index 2c46786eb00..7da22beae14 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -33,8 +33,8 @@ static int try_to_freeze_tasks(bool user_only) unsigned int todo; bool wq_busy = false; struct timeval start, end; - u64 elapsed_csecs64; - unsigned int elapsed_csecs; + u64 elapsed_msecs64; + unsigned int elapsed_msecs; bool wakeup = false; int sleep_usecs = USEC_PER_MSEC; @@ -94,9 +94,9 @@ static int try_to_freeze_tasks(bool user_only) } do_gettimeofday(&end); - elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); - do_div(elapsed_csecs64, NSEC_PER_SEC / 100); - elapsed_csecs = elapsed_csecs64; + elapsed_msecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); + do_div(elapsed_msecs64, NSEC_PER_MSEC); + elapsed_msecs = elapsed_msecs64; if (todo) { /* This does not unfreeze processes that are already frozen @@ -115,7 +115,7 @@ static int try_to_freeze_tasks(bool user_only) printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", wakeup ? "aborted" : "failed", - elapsed_csecs / 100, elapsed_csecs % 100, + elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); } @@ -124,14 +124,14 @@ static int try_to_freeze_tasks(bool user_only) do_each_thread(g, p) { if (p != current && !freezer_should_skip(p) && freezing(p) && !frozen(p) && - elapsed_csecs > 100) + elapsed_msecs > 1000) sched_show_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } } else { - printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100, - elapsed_csecs % 100); + printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, + elapsed_msecs % 1000); } return todo ? -EBUSY : 0; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index b850adc2543..aabcb6de4e1 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -177,6 +177,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) if (!(suspend_test(TEST_CORE) || *wakeup)) { error = suspend_ops->enter(state); events_check_enabled = false; + } else if (*wakeup) { + error = -EBUSY; } syscore_resume(); } diff --git a/kernel/power/userwakelock.c b/kernel/power/userwakelock.c deleted file mode 100644 index a28a8db4146..00000000000 --- a/kernel/power/userwakelock.c +++ /dev/null @@ -1,219 +0,0 @@ -/* kernel/power/userwakelock.c - * - * Copyright (C) 2005-2008 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include -#include -#include - -#include "power.h" - -enum { - DEBUG_FAILURE = BIT(0), - DEBUG_ERROR = BIT(1), - DEBUG_NEW = BIT(2), - DEBUG_ACCESS = BIT(3), - DEBUG_LOOKUP = BIT(4), -}; -static int debug_mask = DEBUG_FAILURE; -module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP); - -static DEFINE_MUTEX(tree_lock); - -struct user_wake_lock { - struct rb_node node; - struct wake_lock wake_lock; - char name[0]; -}; -struct rb_root user_wake_locks; - -static struct user_wake_lock *lookup_wake_lock_name( - const char *buf, int allocate, long *timeoutptr) -{ - struct rb_node **p = &user_wake_locks.rb_node; - struct rb_node *parent = NULL; - struct user_wake_lock *l; - int diff; - u64 timeout; - int name_len; - const char *arg; - - /* Find length of lock name and start of optional timeout string */ - arg = buf; - while (*arg && !isspace(*arg)) - arg++; - name_len = arg - buf; - if (!name_len) - goto bad_arg; - while (isspace(*arg)) - arg++; - - /* Process timeout string */ - if (timeoutptr && *arg) { - timeout = simple_strtoull(arg, (char **)&arg, 0); - while (isspace(*arg)) - arg++; - if (*arg) - goto bad_arg; - /* convert timeout from nanoseconds to jiffies > 0 */ - timeout += (NSEC_PER_SEC / HZ) - 1; - do_div(timeout, (NSEC_PER_SEC / HZ)); - if (timeout <= 0) - timeout = 1; - *timeoutptr = timeout; - } else if (*arg) - goto bad_arg; - else if (timeoutptr) - *timeoutptr = 0; - - /* Lookup wake lock in rbtree */ - while (*p) { - parent = *p; - l = rb_entry(parent, struct user_wake_lock, node); - diff = strncmp(buf, l->name, name_len); - if (!diff && l->name[name_len]) - diff = -1; - if (debug_mask & DEBUG_ERROR) - pr_info("lookup_wake_lock_name: compare %.*s %s %d\n", - name_len, buf, l->name, diff); - - if (diff < 0) - p = &(*p)->rb_left; - else if (diff > 0) - p = &(*p)->rb_right; - else - return l; - } - - /* Allocate and add new wakelock to rbtree */ - if (!allocate) { - if (debug_mask & DEBUG_ERROR) - pr_info("lookup_wake_lock_name: %.*s not found\n", - name_len, buf); - return ERR_PTR(-EINVAL); - } - l = kzalloc(sizeof(*l) + name_len + 1, GFP_KERNEL); - if (l == NULL) { - if (debug_mask & DEBUG_FAILURE) - pr_err("lookup_wake_lock_name: failed to allocate " - "memory for %.*s\n", name_len, buf); - return ERR_PTR(-ENOMEM); - } - memcpy(l->name, buf, name_len); - if (debug_mask & DEBUG_NEW) - pr_info("lookup_wake_lock_name: new wake lock %s\n", l->name); - wake_lock_init(&l->wake_lock, WAKE_LOCK_SUSPEND, l->name); - rb_link_node(&l->node, parent, p); - rb_insert_color(&l->node, &user_wake_locks); - return l; - -bad_arg: - if (debug_mask & DEBUG_ERROR) - pr_info("lookup_wake_lock_name: wake lock, %.*s, bad arg, %s\n", - name_len, buf, arg); - return ERR_PTR(-EINVAL); -} - -ssize_t wake_lock_show( - struct kobject *kobj, struct kobj_attribute *attr, char *buf) -{ - char *s = buf; - char *end = buf + PAGE_SIZE; - struct rb_node *n; - struct user_wake_lock *l; - - mutex_lock(&tree_lock); - - for (n = rb_first(&user_wake_locks); n != NULL; n = rb_next(n)) { - l = rb_entry(n, struct user_wake_lock, node); - if (wake_lock_active(&l->wake_lock)) - s += scnprintf(s, end - s, "%s ", l->name); - } - s += scnprintf(s, end - s, "\n"); - - mutex_unlock(&tree_lock); - return (s - buf); -} - -ssize_t wake_lock_store( - struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) -{ - long timeout; - struct user_wake_lock *l; - - mutex_lock(&tree_lock); - l = lookup_wake_lock_name(buf, 1, &timeout); - if (IS_ERR(l)) { - n = PTR_ERR(l); - goto bad_name; - } - - if (debug_mask & DEBUG_ACCESS) - pr_info("wake_lock_store: %s, timeout %ld\n", l->name, timeout); - - if (timeout) - wake_lock_timeout(&l->wake_lock, timeout); - else - wake_lock(&l->wake_lock); -bad_name: - mutex_unlock(&tree_lock); - return n; -} - - -ssize_t wake_unlock_show( - struct kobject *kobj, struct kobj_attribute *attr, char *buf) -{ - char *s = buf; - char *end = buf + PAGE_SIZE; - struct rb_node *n; - struct user_wake_lock *l; - - mutex_lock(&tree_lock); - - for (n = rb_first(&user_wake_locks); n != NULL; n = rb_next(n)) { - l = rb_entry(n, struct user_wake_lock, node); - if (!wake_lock_active(&l->wake_lock)) - s += scnprintf(s, end - s, "%s ", l->name); - } - s += scnprintf(s, end - s, "\n"); - - mutex_unlock(&tree_lock); - return (s - buf); -} - -ssize_t wake_unlock_store( - struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) -{ - struct user_wake_lock *l; - - mutex_lock(&tree_lock); - l = lookup_wake_lock_name(buf, 0, NULL); - if (IS_ERR(l)) { - n = PTR_ERR(l); - goto not_found; - } - - if (debug_mask & DEBUG_ACCESS) - pr_info("wake_unlock_store: %s\n", l->name); - - wake_unlock(&l->wake_lock); -not_found: - mutex_unlock(&tree_lock); - return n; -} - diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 63699628329..0095af24acd 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -27,19 +27,20 @@ #include #include + #define MAX_WAKEUP_REASON_IRQS 32 static int irq_list[MAX_WAKEUP_REASON_IRQS]; -static int irqcount; +static int irq_count; static struct kobject *wakeup_reason; static spinlock_t resume_reason_lock; -static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, +static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { int irq_no, buf_offset = 0; struct irq_desc *desc; spin_lock(&resume_reason_lock); - for (irq_no = 0; irq_no < irqcount; irq_no++) { + for (irq_no = 0; irq_no < irq_count; irq_no++) { desc = irq_to_desc(irq_list[irq_no]); if (desc && desc->action && desc->action->name) buf_offset += sprintf(buf + buf_offset, "%d %s\n", @@ -52,8 +53,7 @@ static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, return buf_offset; } -static struct kobj_attribute resume_reason = __ATTR(last_resume_reason, 0444, - reason_show, NULL); +static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); static struct attribute *attrs[] = { &resume_reason.attr, @@ -78,14 +78,14 @@ void log_wakeup_reason(int irq) printk(KERN_INFO "Resume caused by IRQ %d\n", irq); spin_lock(&resume_reason_lock); - if (irqcount == MAX_WAKEUP_REASON_IRQS) { + if (irq_count == MAX_WAKEUP_REASON_IRQS) { spin_unlock(&resume_reason_lock); printk(KERN_WARNING "Resume caused by more than %d IRQs\n", MAX_WAKEUP_REASON_IRQS); return; } - irq_list[irqcount++] = irq; + irq_list[irq_count++] = irq; spin_unlock(&resume_reason_lock); } @@ -96,7 +96,7 @@ static int wakeup_reason_pm_event(struct notifier_block *notifier, switch (pm_event) { case PM_SUSPEND_PREPARE: spin_lock(&resume_reason_lock); - irqcount = 0; + irq_count = 0; spin_unlock(&resume_reason_lock); break; default: diff --git a/kernel/signal.c b/kernel/signal.c index c5c70c34632..83ebcc531ef 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2212,7 +2212,7 @@ relock: * Now that we woke up, it's crucial if we're supposed to be * frozen that we freeze now before running anything substantial. */ - try_to_freeze(); + try_to_freeze_nowarn(); spin_lock_irq(&sighand->siglock); /* @@ -2772,7 +2772,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info, recalc_sigpending(); spin_unlock_irq(&tsk->sighand->siglock); - timeout = schedule_timeout_interruptible(timeout); + timeout = freezable_schedule_timeout_interruptible(timeout); spin_lock_irq(&tsk->sighand->siglock); __set_task_blocked(tsk, &tsk->real_blocked); diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index da67ba27e2a..5d2ba2ac3b8 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -37,7 +37,6 @@ static struct alarm_base { spinlock_t lock; struct timerqueue_head timerqueue; - struct hrtimer timer; ktime_t (*gettime)(void); clockid_t base_clockid; } alarm_bases[ALARM_NUMTYPE]; @@ -46,6 +45,8 @@ static struct alarm_base { static ktime_t freezer_delta; static DEFINE_SPINLOCK(freezer_delta_lock); +static struct wakeup_source *ws; + #ifdef CONFIG_RTC_CLASS /* rtc timer and device for setting alarm wakeups at suspend */ static struct rtc_timer rtctimer; @@ -133,21 +134,17 @@ void set_power_on_alarm(long secs, bool enable) { } * @base: pointer to the base where the timer is being run * @alarm: pointer to alarm being enqueued. * - * Adds alarm to a alarm_base timerqueue and if necessary sets - * an hrtimer to run. + * Adds alarm to a alarm_base timerqueue * * Must hold base->lock when calling. */ static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) { + if (alarm->state & ALARMTIMER_STATE_ENQUEUED) + timerqueue_del(&base->timerqueue, &alarm->node); + timerqueue_add(&base->timerqueue, &alarm->node); alarm->state |= ALARMTIMER_STATE_ENQUEUED; - - if (&alarm->node == timerqueue_getnext(&base->timerqueue)) { - hrtimer_try_to_cancel(&base->timer); - hrtimer_start(&base->timer, alarm->node.expires, - HRTIMER_MODE_ABS); - } } /** @@ -155,28 +152,17 @@ static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) * @base: pointer to the base where the timer is running * @alarm: pointer to alarm being removed * - * Removes alarm to a alarm_base timerqueue and if necessary sets - * a new timer to run. + * Removes alarm to a alarm_base timerqueue * * Must hold base->lock when calling. */ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) { - struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue); - if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) return; timerqueue_del(&base->timerqueue, &alarm->node); alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; - - if (next == &alarm->node) { - hrtimer_try_to_cancel(&base->timer); - next = timerqueue_getnext(&base->timerqueue); - if (!next) - return; - hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS); - } } @@ -191,42 +177,23 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) */ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) { - struct alarm_base *base = container_of(timer, struct alarm_base, timer); - struct timerqueue_node *next; + struct alarm *alarm = container_of(timer, struct alarm, timer); + struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; - ktime_t now; int ret = HRTIMER_NORESTART; int restart = ALARMTIMER_NORESTART; spin_lock_irqsave(&base->lock, flags); - now = base->gettime(); - while ((next = timerqueue_getnext(&base->timerqueue))) { - struct alarm *alarm; - ktime_t expired = next->expires; + alarmtimer_remove(base, alarm); + spin_unlock_irqrestore(&base->lock, flags); - if (expired.tv64 > now.tv64) - break; + if (alarm->function) + restart = alarm->function(alarm, base->gettime()); - alarm = container_of(next, struct alarm, node); - - timerqueue_del(&base->timerqueue, &alarm->node); - alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; - - alarm->state |= ALARMTIMER_STATE_CALLBACK; - spin_unlock_irqrestore(&base->lock, flags); - if (alarm->function) - restart = alarm->function(alarm, now); - spin_lock_irqsave(&base->lock, flags); - alarm->state &= ~ALARMTIMER_STATE_CALLBACK; - - if (restart != ALARMTIMER_NORESTART) { - timerqueue_add(&base->timerqueue, &alarm->node); - alarm->state |= ALARMTIMER_STATE_ENQUEUED; - } - } - - if (next) { - hrtimer_set_expires(&base->timer, next->expires); + spin_lock_irqsave(&base->lock, flags); + if (restart != ALARMTIMER_NORESTART) { + hrtimer_set_expires(&alarm->timer, alarm->node.expires); + alarmtimer_enqueue(base, alarm); ret = HRTIMER_RESTART; } spin_unlock_irqrestore(&base->lock, flags); @@ -259,6 +226,7 @@ static int alarmtimer_suspend(struct device *dev) unsigned long flags; struct rtc_device *rtc; int i; + int ret; spin_lock_irqsave(&freezer_delta_lock, flags); min = freezer_delta; @@ -288,8 +256,10 @@ static int alarmtimer_suspend(struct device *dev) if (min.tv64 == 0) return 0; - /* XXX - Should we enforce a minimum sleep time? */ - WARN_ON(min.tv64 < NSEC_PER_SEC); + if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { + __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); + return -EBUSY; + } /* Setup an rtc timer to fire that far in the future */ rtc_timer_cancel(rtc, &rtctimer); @@ -297,9 +267,11 @@ static int alarmtimer_suspend(struct device *dev) now = rtc_tm_to_ktime(tm); now = ktime_add(now, min); - rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); - - return 0; + /* Set alarm, if in the past reject suspend briefly to handle */ + ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); + if (ret < 0) + __pm_wakeup_event(ws, 1 * MSEC_PER_SEC); + return ret; } #else static int alarmtimer_suspend(struct device *dev) @@ -333,25 +305,55 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) { timerqueue_init(&alarm->node); + hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid, + HRTIMER_MODE_ABS); + alarm->timer.function = alarmtimer_fired; alarm->function = function; alarm->type = type; alarm->state = ALARMTIMER_STATE_INACTIVE; } /** - * alarm_start - Sets an alarm to fire + * alarm_start - Sets an absolute alarm to fire * @alarm: ptr to alarm to set * @start: time to run the alarm */ -void alarm_start(struct alarm *alarm, ktime_t start) +int alarm_start(struct alarm *alarm, ktime_t start) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + unsigned long flags; + int ret; + + spin_lock_irqsave(&base->lock, flags); + alarm->node.expires = start; + alarmtimer_enqueue(base, alarm); + ret = hrtimer_start(&alarm->timer, alarm->node.expires, + HRTIMER_MODE_ABS); + spin_unlock_irqrestore(&base->lock, flags); + return ret; +} + +/** + * alarm_start_relative - Sets a relative alarm to fire + * @alarm: ptr to alarm to set + * @start: time relative to now to run the alarm + */ +int alarm_start_relative(struct alarm *alarm, ktime_t start) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + + start = ktime_add(start, base->gettime()); + return alarm_start(alarm, start); +} + +void alarm_restart(struct alarm *alarm) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; spin_lock_irqsave(&base->lock, flags); - if (alarmtimer_active(alarm)) - alarmtimer_remove(base, alarm); - alarm->node.expires = start; + hrtimer_set_expires(&alarm->timer, alarm->node.expires); + hrtimer_restart(&alarm->timer); alarmtimer_enqueue(base, alarm); spin_unlock_irqrestore(&base->lock, flags); } @@ -367,18 +369,12 @@ int alarm_try_to_cancel(struct alarm *alarm) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; - int ret = -1; + int ret; + spin_lock_irqsave(&base->lock, flags); - - if (alarmtimer_callback_running(alarm)) - goto out; - - if (alarmtimer_is_queued(alarm)) { + ret = hrtimer_try_to_cancel(&alarm->timer); + if (ret >= 0) alarmtimer_remove(base, alarm); - ret = 1; - } else - ret = 0; -out: spin_unlock_irqrestore(&base->lock, flags); return ret; } @@ -432,6 +428,12 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) return overrun; } +u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + + return alarm_forward(alarm, base->gettime(), interval); +} @@ -839,10 +841,6 @@ static int __init alarmtimer_init(void) for (i = 0; i < ALARM_NUMTYPE; i++) { timerqueue_init_head(&alarm_bases[i].timerqueue); spin_lock_init(&alarm_bases[i].lock); - hrtimer_init(&alarm_bases[i].timer, - alarm_bases[i].base_clockid, - HRTIMER_MODE_ABS); - alarm_bases[i].timer.function = alarmtimer_fired; } error = alarmtimer_rtc_interface_setup(); @@ -858,6 +856,7 @@ static int __init alarmtimer_init(void) error = PTR_ERR(pdev); goto out_drv; } + ws = wakeup_source_register("alarmtimer"); return 0; out_drv: diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 12bda95b25c..bbb6ced784e 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -69,6 +69,9 @@ config EVENT_TRACING select CONTEXT_SWITCH_TRACER bool +config GPU_TRACEPOINTS + bool + config EVENT_POWER_TRACING_DEPRECATED depends on EVENT_TRACING bool "Deprecated power event trace API, to be removed" diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 3c13931517b..797830a655a 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -62,5 +62,6 @@ endif ifeq ($(CONFIG_TRACING),y) obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif +obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o libftrace-y := ftrace.o diff --git a/kernel/trace/gpu-traces.c b/kernel/trace/gpu-traces.c new file mode 100644 index 00000000000..a4b3f00faee --- /dev/null +++ b/kernel/trace/gpu-traces.c @@ -0,0 +1,23 @@ +/* + * GPU tracepoints + * + * Copyright (C) 2013 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include + +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL(gpu_sched_switch); +EXPORT_TRACEPOINT_SYMBOL(gpu_job_enqueue); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fdfa23de17e..7c6b9c9bd9d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -483,6 +483,7 @@ static const char *trace_options[] = { "overwrite", "disable_on_free", "irq-info", + "print-tgid", NULL }; @@ -983,6 +984,7 @@ void tracing_reset_current_online_cpus(void) static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; +static unsigned saved_tgids[SAVED_CMDLINES]; static int cmdline_idx; static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; @@ -1128,6 +1130,7 @@ static void trace_save_cmdline(struct task_struct *tsk) } memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); + saved_tgids[idx] = tsk->tgid; arch_spin_unlock(&trace_cmdline_lock); } @@ -1163,6 +1166,25 @@ void trace_find_cmdline(int pid, char comm[]) preempt_enable(); } +int trace_find_tgid(int pid) +{ + unsigned map; + int tgid; + + preempt_disable(); + arch_spin_lock(&trace_cmdline_lock); + map = map_pid_to_cmdline[pid]; + if (map != NO_CMDLINE_MAP) + tgid = saved_tgids[map]; + else + tgid = -1; + + arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); + + return tgid; +} + void tracing_record_cmdline(struct task_struct *tsk) { if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled || @@ -1972,6 +1994,13 @@ static void print_func_help_header(struct trace_array *tr, struct seq_file *m) seq_puts(m, "# | | | | |\n"); } +static void print_func_help_header_tgid(struct trace_array *tr, struct seq_file *m) +{ + print_event_info(tr, m); + seq_puts(m, "# TASK-PID TGID CPU# TIMESTAMP FUNCTION\n"); + seq_puts(m, "# | | | | | |\n"); +} + static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m) { print_event_info(tr, m); @@ -1984,6 +2013,18 @@ static void print_func_help_header_irq(struct trace_array *tr, struct seq_file * seq_puts(m, "# | | | |||| | |\n"); } +static void print_func_help_header_irq_tgid(struct trace_array *tr, struct seq_file *m) +{ + print_event_info(tr, m); + seq_puts(m, "# _-----=> irqs-off\n"); + seq_puts(m, "# / _----=> need-resched\n"); + seq_puts(m, "# | / _---=> hardirq/softirq\n"); + seq_puts(m, "# || / _--=> preempt-depth\n"); + seq_puts(m, "# ||| / delay\n"); + seq_puts(m, "# TASK-PID TGID CPU# |||| TIMESTAMP FUNCTION\n"); + seq_puts(m, "# | | | | |||| | |\n"); +} + void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { @@ -2276,9 +2317,15 @@ void trace_default_header(struct seq_file *m) } else { if (!(trace_flags & TRACE_ITER_VERBOSE)) { if (trace_flags & TRACE_ITER_IRQ_INFO) - print_func_help_header_irq(iter->tr, m); + if (trace_flags & TRACE_ITER_TGID) + print_func_help_header_irq_tgid(iter->tr, m); + else + print_func_help_header_irq(iter->tr, m); else - print_func_help_header(iter->tr, m); + if (trace_flags & TRACE_ITER_TGID) + print_func_help_header_tgid(iter->tr, m); + else + print_func_help_header(iter->tr, m); } } } @@ -2932,9 +2979,53 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf, } static const struct file_operations tracing_saved_cmdlines_fops = { - .open = tracing_open_generic, - .read = tracing_saved_cmdlines_read, - .llseek = generic_file_llseek, + .open = tracing_open_generic, + .read = tracing_saved_cmdlines_read, + .llseek = generic_file_llseek, +}; + +static ssize_t +tracing_saved_tgids_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *file_buf; + char *buf; + int len = 0; + int pid; + int i; + + file_buf = kmalloc(SAVED_CMDLINES*(16+1+16), GFP_KERNEL); + if (!file_buf) + return -ENOMEM; + + buf = file_buf; + + for (i = 0; i < SAVED_CMDLINES; i++) { + int tgid; + int r; + + pid = map_cmdline_to_pid[i]; + if (pid == -1 || pid == NO_CMDLINE_MAP) + continue; + + tgid = trace_find_tgid(pid); + r = sprintf(buf, "%d %d\n", pid, tgid); + buf += r; + len += r; + } + + len = simple_read_from_buffer(ubuf, cnt, ppos, + file_buf, len); + + kfree(file_buf); + + return len; +} + +static const struct file_operations tracing_saved_tgids_fops = { + .open = tracing_open_generic, + .read = tracing_saved_tgids_read, + .llseek = generic_file_llseek, }; static ssize_t @@ -4785,6 +4876,9 @@ static __init int tracer_init_debugfs(void) trace_create_file("saved_cmdlines", 0444, d_tracer, NULL, &tracing_saved_cmdlines_fops); + trace_create_file("saved_tgids", 0444, d_tracer, + NULL, &tracing_saved_tgids_fops); + trace_create_file("trace_clock", 0644, d_tracer, NULL, &trace_clock_fops); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7360674ea04..9f2abe3924f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -460,6 +460,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags, extern cycle_t ftrace_now(int cpu); extern void trace_find_cmdline(int pid, char comm[]); +extern int trace_find_tgid(int pid); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; @@ -671,6 +672,7 @@ enum trace_iterator_flags { TRACE_ITER_OVERWRITE = 0x200000, TRACE_ITER_STOP_ON_FREE = 0x400000, TRACE_ITER_IRQ_INFO = 0x800000, + TRACE_ITER_TGID = 0x1000000, }; /* diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a7d2a4c653d..f7f1f37a4c5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -46,6 +46,8 @@ struct fgraph_data { #define TRACE_GRAPH_PRINT_DURATION 0x10 #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 #define TRACE_GRAPH_PRINT_IRQS 0x40 +#define TRACE_GRAPH_PRINT_FLAT 0x80 + static struct tracer_opt trace_opts[] = { /* Display overruns? (for self-debug purpose) */ @@ -62,6 +64,8 @@ static struct tracer_opt trace_opts[] = { { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, /* Display interrupts */ { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) }, + /* Use standard trace formatting rather than hierarchical */ + { TRACER_OPT(funcgraph-flat, TRACE_GRAPH_PRINT_FLAT) }, { } /* Empty entry */ }; @@ -1222,6 +1226,9 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) int cpu = iter->cpu; int ret; + if (flags & TRACE_GRAPH_PRINT_FLAT) + return TRACE_TYPE_UNHANDLED; + if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) { per_cpu_ptr(data->cpu_data, cpu)->ignore = 0; return TRACE_TYPE_HANDLED; @@ -1279,13 +1286,6 @@ print_graph_function(struct trace_iterator *iter) return print_graph_function_flags(iter, tracer_flags.val); } -static enum print_line_t -print_graph_function_event(struct trace_iterator *iter, int flags, - struct trace_event *event) -{ - return print_graph_function(iter); -} - static void print_lat_header(struct seq_file *s, u32 flags) { static const char spaces[] = " " /* 16 spaces */ @@ -1352,6 +1352,11 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags) { struct trace_iterator *iter = s->private; + if (flags & TRACE_GRAPH_PRINT_FLAT) { + trace_default_header(s); + return; + } + if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) return; @@ -1422,20 +1427,6 @@ static int func_graph_set_flag(u32 old_flags, u32 bit, int set) return 0; } -static struct trace_event_functions graph_functions = { - .trace = print_graph_function_event, -}; - -static struct trace_event graph_trace_entry_event = { - .type = TRACE_GRAPH_ENT, - .funcs = &graph_functions, -}; - -static struct trace_event graph_trace_ret_event = { - .type = TRACE_GRAPH_RET, - .funcs = &graph_functions -}; - static struct tracer graph_trace __read_mostly = { .name = "function_graph", .open = graph_trace_open, @@ -1458,16 +1449,6 @@ static __init int init_graph_trace(void) { max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); - if (!register_ftrace_event(&graph_trace_entry_event)) { - pr_warning("Warning: could not register graph trace events\n"); - return 1; - } - - if (!register_ftrace_event(&graph_trace_ret_event)) { - pr_warning("Warning: could not register graph trace events\n"); - return 1; - } - return register_tracer(&graph_trace); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index df611a0e76c..cb29ce200e7 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -630,11 +630,25 @@ int trace_print_context(struct trace_iterator *iter) unsigned long secs = (unsigned long)t; char comm[TASK_COMM_LEN]; int ret; + int tgid; trace_find_cmdline(entry->pid, comm); - ret = trace_seq_printf(s, "%16s-%-5d [%03d] ", - comm, entry->pid, iter->cpu); + ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + if (!ret) + return 0; + + if (trace_flags & TRACE_ITER_TGID) { + tgid = trace_find_tgid(entry->pid); + if (tgid < 0) + ret = trace_seq_puts(s, "(-----) "); + else + ret = trace_seq_printf(s, "(%5d) ", tgid); + if (!ret) + return 0; + } + + ret = trace_seq_printf(s, "[%03d] ", iter->cpu); if (!ret) return 0; @@ -966,6 +980,168 @@ static struct trace_event trace_fn_event = { .funcs = &trace_fn_funcs, }; +/* TRACE_GRAPH_ENT */ +static enum print_line_t trace_graph_ent_trace(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct trace_seq *s = &iter->seq; + struct ftrace_graph_ent_entry *field; + + trace_assign_type(field, iter->ent); + + if (!trace_seq_puts(s, "graph_ent: func=")) + return TRACE_TYPE_PARTIAL_LINE; + + if (!seq_print_ip_sym(s, field->graph_ent.func, flags)) + return TRACE_TYPE_PARTIAL_LINE; + + if (!trace_seq_puts(s, "\n")) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ent_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ent_entry *field; + + trace_assign_type(field, iter->ent); + + if (!trace_seq_printf(&iter->seq, "%lx %d\n", + field->graph_ent.func, + field->graph_ent.depth)) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ent_hex(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ent_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.func); + SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.depth); + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ent_bin(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ent_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_FIELD_RET(s, field->graph_ent.func); + SEQ_PUT_FIELD_RET(s, field->graph_ent.depth); + + return TRACE_TYPE_HANDLED; +} + +static struct trace_event_functions trace_graph_ent_funcs = { + .trace = trace_graph_ent_trace, + .raw = trace_graph_ent_raw, + .hex = trace_graph_ent_hex, + .binary = trace_graph_ent_bin, +}; + +static struct trace_event trace_graph_ent_event = { + .type = TRACE_GRAPH_ENT, + .funcs = &trace_graph_ent_funcs, +}; + +/* TRACE_GRAPH_RET */ +static enum print_line_t trace_graph_ret_trace(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct ftrace_graph_ret_entry *field; + + trace_assign_type(field, entry); + + if (!trace_seq_puts(s, "graph_ret: func=")) + return TRACE_TYPE_PARTIAL_LINE; + + if (!seq_print_ip_sym(s, field->ret.func, flags)) + return TRACE_TYPE_PARTIAL_LINE; + + if (!trace_seq_puts(s, "\n")) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ret_raw(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ret_entry *field; + + trace_assign_type(field, iter->ent); + + if (!trace_seq_printf(&iter->seq, "%lx %lld %lld %ld %d\n", + field->ret.func, + field->ret.calltime, + field->ret.rettime, + field->ret.overrun, + field->ret.depth)); + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ret_hex(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ret_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_HEX_FIELD_RET(s, field->ret.func); + SEQ_PUT_HEX_FIELD_RET(s, field->ret.calltime); + SEQ_PUT_HEX_FIELD_RET(s, field->ret.rettime); + SEQ_PUT_HEX_FIELD_RET(s, field->ret.overrun); + SEQ_PUT_HEX_FIELD_RET(s, field->ret.depth); + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t trace_graph_ret_bin(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct ftrace_graph_ret_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + SEQ_PUT_FIELD_RET(s, field->ret.func); + SEQ_PUT_FIELD_RET(s, field->ret.calltime); + SEQ_PUT_FIELD_RET(s, field->ret.rettime); + SEQ_PUT_FIELD_RET(s, field->ret.overrun); + SEQ_PUT_FIELD_RET(s, field->ret.depth); + + return TRACE_TYPE_HANDLED; +} + +static struct trace_event_functions trace_graph_ret_funcs = { + .trace = trace_graph_ret_trace, + .raw = trace_graph_ret_raw, + .hex = trace_graph_ret_hex, + .binary = trace_graph_ret_bin, +}; + +static struct trace_event trace_graph_ret_event = { + .type = TRACE_GRAPH_RET, + .funcs = &trace_graph_ret_funcs, +}; + /* TRACE_CTX an TRACE_WAKE */ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, char *delim) @@ -1298,6 +1474,8 @@ static struct trace_event trace_print_event = { static struct trace_event *events[] __initdata = { &trace_fn_event, + &trace_graph_ent_event, + &trace_graph_ret_event, &trace_ctx_event, &trace_wake_event, &trace_stack_event, diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 06628358bee..a72b8d1a4b7 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -39,6 +39,8 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); +#endif +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif @@ -174,7 +176,7 @@ void touch_softlockup_watchdog_sync(void) __raw_get_cpu_var(watchdog_touch_ts) = 0; } -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI /* watchdog detector functions */ static int is_hardlockup(void) { @@ -188,6 +190,61 @@ static int is_hardlockup(void) } #endif +#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU +static int is_hardlockup_other_cpu(int cpu) +{ + unsigned long hrint = per_cpu(hrtimer_interrupts, cpu); + + if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) + return 1; + + per_cpu(hrtimer_interrupts_saved, cpu) = hrint; + return 0; +} + +static void watchdog_check_hardlockup_other_cpu(void) +{ + int cpu; + + /* + * Test for hardlockups every 3 samples. The sample period is + * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over + * watchdog_thresh (over by 20%). + */ + if (__this_cpu_read(hrtimer_interrupts) % 3 != 0) + return; + + /* check for a hardlockup on the next cpu */ + cpu = cpumask_next(smp_processor_id(), cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + if (cpu == smp_processor_id()) + return; + + if (per_cpu(watchdog_nmi_touch, cpu) == true) { + per_cpu(watchdog_nmi_touch, cpu) = false; + return; + } + + if (is_hardlockup_other_cpu(cpu)) { + /* only warn once */ + if (per_cpu(hard_watchdog_warn, cpu) == true) + return; + + if (hardlockup_panic) + panic("Watchdog detected hard LOCKUP on cpu %d", cpu); + else + WARN(1, "Watchdog detected hard LOCKUP on cpu %d", cpu); + + per_cpu(hard_watchdog_warn, cpu) = true; + } else { + per_cpu(hard_watchdog_warn, cpu) = false; + } +} +#else +static inline void watchdog_check_hardlockup_other_cpu(void) { return; } +#endif + static int is_softlockup(unsigned long touch_ts) { unsigned long now = get_timestamp(smp_processor_id()); @@ -199,7 +256,7 @@ static int is_softlockup(unsigned long touch_ts) return 0; } -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, @@ -247,6 +304,9 @@ static void watchdog_overflow_callback(struct perf_event *event, __this_cpu_write(hard_watchdog_warn, false); return; } +#endif + +#ifdef CONFIG_HARDLOCKUP_DETECTOR static void watchdog_interrupt_count(void) { __this_cpu_inc(hrtimer_interrupts); @@ -265,6 +325,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* kick the hardlockup detector */ watchdog_interrupt_count(); + /* test for hardlockups on the next cpu */ + watchdog_check_hardlockup_other_cpu(); + /* kick the softlockup detector */ wake_up_process(__this_cpu_read(softlockup_watchdog)); @@ -359,7 +422,7 @@ static int watchdog(void *unused) } -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI static int watchdog_nmi_enable(int cpu) { struct perf_event_attr *wd_attr; @@ -430,6 +493,18 @@ static void watchdog_prepare_cpu(int cpu) WARN_ON(per_cpu(softlockup_watchdog, cpu)); hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; + +#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU + /* + * The new cpu will be marked online before the first hrtimer interrupt + * runs on it. If another cpu tests for a hardlockup on the new cpu + * before it has run its first hrtimer, it will get a false positive. + * Touch the watchdog on the new cpu to delay the first check for at + * least 3 sampling periods to guarantee one hrtimer has run on the new + * cpu. + */ + per_cpu(watchdog_nmi_touch, cpu) = true; +#endif } static int watchdog_enable(int cpu) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0e157135173..b3b44c11bc9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -178,14 +178,24 @@ config LOCKUP_DETECTOR The overhead should be minimal. A periodic hrtimer runs to generate interrupts and kick the watchdog task every 4 seconds. An NMI is generated every 10 seconds or so to check for hardlockups. + If NMIs are not available on the platform, every 12 seconds the + hrtimer interrupt on one cpu will be used to check for hardlockups + on the next cpu. The frequency of hrtimer and NMI events and the soft and hard lockup thresholds can be controlled through the sysctl watchdog_thresh. -config HARDLOCKUP_DETECTOR +config HARDLOCKUP_DETECTOR_NMI def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ !HAVE_NMI_WATCHDOG +config HARDLOCKUP_DETECTOR_OTHER_CPU + def_bool LOCKUP_DETECTOR && SMP && !HARDLOCKUP_DETECTOR_NMI && \ + !HAVE_NMI_WATCHDOG + +config HARDLOCKUP_DETECTOR + def_bool HARDLOCKUP_DETECTOR_NMI || HARDLOCKUP_DETECTOR_OTHER_CPU + config BOOTPARAM_HARDLOCKUP_PANIC bool "Panic (Reboot) On Hard Lockups" depends on LOCKUP_DETECTOR diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index a1c387f6afb..0875a8e6eda 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -89,9 +89,11 @@ copy_literal_run: COPY8(op, ip); op += 8; ip += 8; +# if !defined(__arm__) COPY8(op, ip); op += 8; ip += 8; +# endif } while (ip < ie); ip = ie; op = oe; @@ -190,9 +192,11 @@ copy_literal_run: COPY8(op, m_pos); op += 8; m_pos += 8; +# if !defined(__arm__) COPY8(op, m_pos); op += 8; m_pos += 8; +# endif } while (op < oe); op = oe; if (HAVE_IP(6)) { diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index 6710b83ce72..db756ccb275 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -12,8 +12,14 @@ */ +#if 1 && defined(__arm__) && ((__LINUX_ARM_ARCH__ >= 6) || defined(__ARM_FEATURE_UNALIGNED)) +#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1 +#define COPY4(dst, src) \ + * (u32 *) (void *) (dst) = * (const u32 *) (const void *) (src) +#else #define COPY4(dst, src) \ put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) +#endif #if defined(__x86_64__) #define COPY8(dst, src) \ put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) diff --git a/mm/Makefile b/mm/Makefile index 779ba82a7af..eac249bc1f4 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -46,7 +46,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o +obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o vmpressure.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 599f5f33344..e7f9a4e48a7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -228,6 +229,9 @@ struct mem_cgroup { */ struct res_counter res; + /* vmpressure notifications */ + struct vmpressure vmpressure; + union { /* * the counter to account for mem+swap usage. @@ -394,6 +398,25 @@ enum charge_type { static void mem_cgroup_get(struct mem_cgroup *memcg); static void mem_cgroup_put(struct mem_cgroup *memcg); +/* Some nice accessors for the vmpressure. */ +struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg) +{ + if (!memcg) + memcg = root_mem_cgroup; + return &memcg->vmpressure; +} + +struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) +{ + return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; +} + +struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css) +{ + struct mem_cgroup *memcg = container_of(css, struct mem_cgroup, css); + return &memcg->vmpressure; +} + /* Writing them here to avoid exposing memcg's inner layout */ #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM #include @@ -4527,16 +4550,17 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp, swap_buffers: /* Swap primary and spare array */ thresholds->spare = thresholds->primary; - /* If all events are unregistered, free the spare array */ - if (!new) { - kfree(thresholds->spare); - thresholds->spare = NULL; - } rcu_assign_pointer(thresholds->primary, new); /* To be sure that nobody uses thresholds */ synchronize_rcu(); + + /* If all events are unregistered, free the spare array */ + if (!new) { + kfree(thresholds->spare); + thresholds->spare = NULL; + } unlock: mutex_unlock(&memcg->thresholds_lock); } @@ -4734,6 +4758,11 @@ static struct cftype mem_cgroup_files[] = { .unregister_event = mem_cgroup_oom_unregister_event, .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), }, + { + .name = "pressure_level", + .register_event = vmpressure_register_event, + .unregister_event = vmpressure_unregister_event, + }, #ifdef CONFIG_NUMA { .name = "numa_stat", @@ -5036,6 +5065,7 @@ mem_cgroup_create(struct cgroup *cont) memcg->move_charge_at_immigrate = 0; mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); + vmpressure_init(&memcg->vmpressure); return &memcg->css; free_out: __mem_cgroup_free(memcg); diff --git a/mm/mlock.c b/mm/mlock.c index 39a36dbfbe5..2747d81d6e0 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -23,10 +23,10 @@ int can_do_mlock(void) { - if (capable(CAP_IPC_LOCK)) - return 1; if (rlimit(RLIMIT_MEMLOCK) != 0) return 1; + if (capable(CAP_IPC_LOCK)) + return 1; return 0; } EXPORT_SYMBOL(can_do_mlock); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6a6ddd7ebc6..035d6aeb9d0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5389,11 +5389,10 @@ static void __setup_per_zone_wmarks(void) zone->watermark[WMARK_MIN] = min; } - zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + - low + (min >> 2); - zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + - low + (min >> 1); - + zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + + low + (min >> 2); + zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + + low + (min >> 1); setup_zone_migrate_reserve(zone); spin_unlock_irqrestore(&zone->lock, flags); } diff --git a/mm/vmpressure.c b/mm/vmpressure.c new file mode 100644 index 00000000000..c8f48297aa7 --- /dev/null +++ b/mm/vmpressure.c @@ -0,0 +1,375 @@ +/* + * Linux VM pressure + * + * Copyright 2012 Linaro Ltd. + * Anton Vorontsov + * + * Based on ideas from Andrew Morton, David Rientjes, KOSAKI Motohiro, + * Leonid Moiseichuk, Mel Gorman, Minchan Kim and Pekka Enberg. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The window size (vmpressure_win) is the number of scanned pages before + * we try to analyze scanned/reclaimed ratio. So the window is used as a + * rate-limit tunable for the "low" level notification, and also for + * averaging the ratio for medium/critical levels. Using small window + * sizes can cause lot of false positives, but too big window size will + * delay the notifications. + * + * As the vmscan reclaimer logic works with chunks which are multiple of + * SWAP_CLUSTER_MAX, it makes sense to use it for the window size as well. + * + * TODO: Make the window size depend on machine size, as we do for vmstat + * thresholds. Currently we set it to 512 pages (2MB for 4KB pages). + */ +static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16; + +/* + * These thresholds are used when we account memory pressure through + * scanned/reclaimed ratio. The current values were chosen empirically. In + * essence, they are percents: the higher the value, the more number + * unsuccessful reclaims there were. + */ +static const unsigned int vmpressure_level_med = 60; +static const unsigned int vmpressure_level_critical = 95; + +/* + * When there are too little pages left to scan, vmpressure() may miss the + * critical pressure as number of pages will be less than "window size". + * However, in that case the vmscan priority will raise fast as the + * reclaimer will try to scan LRUs more deeply. + * + * The vmscan logic considers these special priorities: + * + * prio == DEF_PRIORITY (12): reclaimer starts with that value + * prio <= DEF_PRIORITY - 2 : kswapd becomes somewhat overwhelmed + * prio == 0 : close to OOM, kernel scans every page in an lru + * + * Any value in this range is acceptable for this tunable (i.e. from 12 to + * 0). Current value for the vmpressure_level_critical_prio is chosen + * empirically, but the number, in essence, means that we consider + * critical level when scanning depth is ~10% of the lru size (vmscan + * scans 'lru_size >> prio' pages, so it is actually 12.5%, or one + * eights). + */ +static const unsigned int vmpressure_level_critical_prio = ilog2(100 / 10); + +static struct vmpressure *work_to_vmpressure(struct work_struct *work) +{ + return container_of(work, struct vmpressure, work); +} + +static struct vmpressure *cg_to_vmpressure(struct cgroup *cg) +{ + return css_to_vmpressure(cgroup_subsys_state(cg, mem_cgroup_subsys_id)); +} + +static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr) +{ + struct cgroup *cg = vmpressure_to_css(vmpr)->cgroup; + struct mem_cgroup *memcg = mem_cgroup_from_cont(cg); + + memcg = parent_mem_cgroup(memcg); + if (!memcg) + return NULL; + return memcg_to_vmpressure(memcg); +} + +enum vmpressure_levels { + VMPRESSURE_LOW = 0, + VMPRESSURE_MEDIUM, + VMPRESSURE_CRITICAL, + VMPRESSURE_NUM_LEVELS, +}; + +static const char * const vmpressure_str_levels[] = { + [VMPRESSURE_LOW] = "low", + [VMPRESSURE_MEDIUM] = "medium", + [VMPRESSURE_CRITICAL] = "critical", +}; + +static enum vmpressure_levels vmpressure_level(unsigned long pressure) +{ + if (pressure >= vmpressure_level_critical) + return VMPRESSURE_CRITICAL; + else if (pressure >= vmpressure_level_med) + return VMPRESSURE_MEDIUM; + return VMPRESSURE_LOW; +} + +static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, + unsigned long reclaimed) +{ + unsigned long scale = scanned + reclaimed; + unsigned long pressure; + + /* + * We calculate the ratio (in percents) of how many pages were + * scanned vs. reclaimed in a given time frame (window). Note that + * time is in VM reclaimer's "ticks", i.e. number of pages + * scanned. This makes it possible to set desired reaction time + * and serves as a ratelimit. + */ + pressure = scale - (reclaimed * scale / scanned); + pressure = pressure * 100 / scale; + + pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure, + scanned, reclaimed); + + return vmpressure_level(pressure); +} + +struct vmpressure_event { + struct eventfd_ctx *efd; + enum vmpressure_levels level; + struct list_head node; +}; + +static bool vmpressure_event(struct vmpressure *vmpr, + unsigned long scanned, unsigned long reclaimed) +{ + struct vmpressure_event *ev; + enum vmpressure_levels level; + bool signalled = false; + + level = vmpressure_calc_level(scanned, reclaimed); + + mutex_lock(&vmpr->events_lock); + + list_for_each_entry(ev, &vmpr->events, node) { + if (level >= ev->level) { + eventfd_signal(ev->efd, 1); + signalled = true; + } + } + + mutex_unlock(&vmpr->events_lock); + + return signalled; +} + +static void vmpressure_work_fn(struct work_struct *work) +{ + struct vmpressure *vmpr = work_to_vmpressure(work); + unsigned long scanned; + unsigned long reclaimed; + + /* + * Several contexts might be calling vmpressure(), so it is + * possible that the work was rescheduled again before the old + * work context cleared the counters. In that case we will run + * just after the old work returns, but then scanned might be zero + * here. No need for any locks here since we don't care if + * vmpr->reclaimed is in sync. + */ + if (!vmpr->scanned) + return; + + mutex_lock(&vmpr->sr_lock); + scanned = vmpr->scanned; + reclaimed = vmpr->reclaimed; + vmpr->scanned = 0; + vmpr->reclaimed = 0; + mutex_unlock(&vmpr->sr_lock); + + do { + if (vmpressure_event(vmpr, scanned, reclaimed)) + break; + /* + * If not handled, propagate the event upward into the + * hierarchy. + */ + } while ((vmpr = vmpressure_parent(vmpr))); +} + +/** + * vmpressure() - Account memory pressure through scanned/reclaimed ratio + * @gfp: reclaimer's gfp mask + * @memcg: cgroup memory controller handle + * @scanned: number of pages scanned + * @reclaimed: number of pages reclaimed + * + * This function should be called from the vmscan reclaim path to account + * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw + * pressure index is then further refined and averaged over time. + * + * This function does not return any value. + */ +void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, + unsigned long scanned, unsigned long reclaimed) +{ + struct vmpressure *vmpr = memcg_to_vmpressure(memcg); + + /* + * Here we only want to account pressure that userland is able to + * help us with. For example, suppose that DMA zone is under + * pressure; if we notify userland about that kind of pressure, + * then it will be mostly a waste as it will trigger unnecessary + * freeing of memory by userland (since userland is more likely to + * have HIGHMEM/MOVABLE pages instead of the DMA fallback). That + * is why we include only movable, highmem and FS/IO pages. + * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so + * we account it too. + */ + if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS))) + return; + + /* + * If we got here with no pages scanned, then that is an indicator + * that reclaimer was unable to find any shrinkable LRUs at the + * current scanning depth. But it does not mean that we should + * report the critical pressure, yet. If the scanning priority + * (scanning depth) goes too high (deep), we will be notified + * through vmpressure_prio(). But so far, keep calm. + */ + if (!scanned) + return; + + mutex_lock(&vmpr->sr_lock); + vmpr->scanned += scanned; + vmpr->reclaimed += reclaimed; + scanned = vmpr->scanned; + mutex_unlock(&vmpr->sr_lock); + + if (scanned < vmpressure_win || work_pending(&vmpr->work)) + return; + schedule_work(&vmpr->work); +} + +/** + * vmpressure_prio() - Account memory pressure through reclaimer priority level + * @gfp: reclaimer's gfp mask + * @memcg: cgroup memory controller handle + * @prio: reclaimer's priority + * + * This function should be called from the reclaim path every time when + * the vmscan's reclaiming priority (scanning depth) changes. + * + * This function does not return any value. + */ +void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) +{ + /* + * We only use prio for accounting critical level. For more info + * see comment for vmpressure_level_critical_prio variable above. + */ + if (prio > vmpressure_level_critical_prio) + return; + + /* + * OK, the prio is below the threshold, updating vmpressure + * information before shrinker dives into long shrinking of long + * range vmscan. Passing scanned = vmpressure_win, reclaimed = 0 + * to the vmpressure() basically means that we signal 'critical' + * level. + */ + vmpressure(gfp, memcg, vmpressure_win, 0); +} + +/** + * vmpressure_register_event() - Bind vmpressure notifications to an eventfd + * @cg: cgroup that is interested in vmpressure notifications + * @cft: cgroup control files handle + * @eventfd: eventfd context to link notifications with + * @args: event arguments (used to set up a pressure level threshold) + * + * This function associates eventfd context with the vmpressure + * infrastructure, so that the notifications will be delivered to the + * @eventfd. The @args parameter is a string that denotes pressure level + * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or + * "critical"). + * + * This function should not be used directly, just pass it to (struct + * cftype).register_event, and then cgroup core will handle everything by + * itself. + */ +int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, + struct eventfd_ctx *eventfd, const char *args) +{ + struct vmpressure *vmpr = cg_to_vmpressure(cg); + struct vmpressure_event *ev; + int level; + + for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++) { + if (!strcmp(vmpressure_str_levels[level], args)) + break; + } + + if (level >= VMPRESSURE_NUM_LEVELS) + return -EINVAL; + + ev = kzalloc(sizeof(*ev), GFP_KERNEL); + if (!ev) + return -ENOMEM; + + ev->efd = eventfd; + ev->level = level; + + mutex_lock(&vmpr->events_lock); + list_add(&ev->node, &vmpr->events); + mutex_unlock(&vmpr->events_lock); + + return 0; +} + +/** + * vmpressure_unregister_event() - Unbind eventfd from vmpressure + * @cg: cgroup handle + * @cft: cgroup control files handle + * @eventfd: eventfd context that was used to link vmpressure with the @cg + * + * This function does internal manipulations to detach the @eventfd from + * the vmpressure notifications, and then frees internal resources + * associated with the @eventfd (but the @eventfd itself is not freed). + * + * This function should not be used directly, just pass it to (struct + * cftype).unregister_event, and then cgroup core will handle everything + * by itself. + */ +void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, + struct eventfd_ctx *eventfd) +{ + struct vmpressure *vmpr = cg_to_vmpressure(cg); + struct vmpressure_event *ev; + + mutex_lock(&vmpr->events_lock); + list_for_each_entry(ev, &vmpr->events, node) { + if (ev->efd != eventfd) + continue; + list_del(&ev->node); + kfree(ev); + break; + } + mutex_unlock(&vmpr->events_lock); +} + +/** + * vmpressure_init() - Initialize vmpressure control structure + * @vmpr: Structure to be initialized + * + * This function should be called on every allocated vmpressure structure + * before any usage. + */ +void vmpressure_init(struct vmpressure *vmpr) +{ + mutex_init(&vmpr->sr_lock); + mutex_init(&vmpr->events_lock); + INIT_LIST_HEAD(&vmpr->events); + INIT_WORK(&vmpr->work, vmpressure_work_fn); +} diff --git a/mm/vmscan.c b/mm/vmscan.c index 639ed2a6841..74fc9f031af 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #include #include #include +#include #include #include @@ -222,6 +224,38 @@ static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, return zone_page_state(mz->zone, NR_LRU_BASE + lru); } +struct dentry *debug_file; + +static int debug_shrinker_show(struct seq_file *s, void *unused) +{ + struct shrinker *shrinker; + struct shrink_control sc; + + sc.gfp_mask = -1; + sc.nr_to_scan = 0; + + down_read(&shrinker_rwsem); + list_for_each_entry(shrinker, &shrinker_list, list) { + int num_objs; + + num_objs = shrinker->shrink(shrinker, &sc); + seq_printf(s, "%pf %d\n", shrinker->shrink, num_objs); + } + up_read(&shrinker_rwsem); + return 0; +} + +static int debug_shrinker_open(struct inode *inode, struct file *file) +{ + return single_open(file, debug_shrinker_show, inode->i_private); +} + +static const struct file_operations debug_shrinker_fops = { + .open = debug_shrinker_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; /* * Add a shrinker callback to be called from the vm @@ -235,6 +269,15 @@ void register_shrinker(struct shrinker *shrinker) } EXPORT_SYMBOL(register_shrinker); +static int __init add_shrinker_debug(void) +{ + debugfs_create_file("shrinker", 0644, NULL, NULL, + &debug_shrinker_fops); + return 0; +} + +late_initcall(add_shrinker_debug); + /* * Remove one */ @@ -2036,6 +2079,7 @@ restart: static void shrink_zone(struct zone *zone, struct scan_control *sc) { + unsigned long nr_reclaimed, nr_scanned; struct mem_cgroup *root = sc->target_mem_cgroup; struct mem_cgroup_reclaim_cookie reclaim = { .zone = zone, @@ -2043,6 +2087,9 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) }; struct mem_cgroup *memcg; + nr_reclaimed = sc->nr_reclaimed; + nr_scanned = sc->nr_scanned; + memcg = mem_cgroup_iter(root, NULL, &reclaim); do { struct mem_cgroup_zone mz = { @@ -2067,6 +2114,10 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) } memcg = mem_cgroup_iter(root, memcg, &reclaim); } while (memcg); + + vmpressure(sc->gfp_mask, sc->target_mem_cgroup, + sc->nr_scanned - nr_scanned, + sc->nr_reclaimed - nr_reclaimed); } /* Returns true if compaction should go ahead for a high-order request */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 984f9f6bf5c..864fa47024d 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -40,10 +40,6 @@ #include -#ifdef CONFIG_ANDROID_PARANOID_NETWORK -#include -#endif - #ifndef CONFIG_BT_SOCK_DEBUG #undef BT_DBG #define BT_DBG(D...) @@ -134,15 +130,15 @@ int bt_sock_unregister(int proto) } EXPORT_SYMBOL(bt_sock_unregister); -#ifdef CONFIG_ANDROID_PARANOID_NETWORK +#ifdef CONFIG_PARANOID_NETWORK static inline int current_has_bt_admin(void) { - return (!current_euid() || in_egroup_p(AID_NET_BT_ADMIN)); + return !current_euid(); } static inline int current_has_bt(void) { - return (current_has_bt_admin() || in_egroup_p(AID_NET_BT)); + return current_has_bt_admin(); } # else static inline int current_has_bt_admin(void) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index d08415c6d39..ed4637cbcb6 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -346,15 +346,19 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock, static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { - struct sockaddr_rc *sa = (struct sockaddr_rc *) addr; + struct sockaddr_rc sa; struct sock *sk = sock->sk; - int err = 0; - - BT_DBG("sk %p %s", sk, batostr(&sa->rc_bdaddr)); + int len, err = 0; if (!addr || addr->sa_family != AF_BLUETOOTH) return -EINVAL; + memset(&sa, 0, sizeof(sa)); + len = min_t(unsigned int, sizeof(sa), addr_len); + memcpy(&sa, addr, len); + + BT_DBG("sk %p %s", sk, batostr(&sa.rc_bdaddr)); + lock_sock(sk); if (sk->sk_state != BT_OPEN) { @@ -369,12 +373,12 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr write_lock_bh(&rfcomm_sk_list.lock); - if (sa->rc_channel && __rfcomm_get_sock_by_addr(sa->rc_channel, &sa->rc_bdaddr)) { + if (sa.rc_channel && __rfcomm_get_sock_by_addr(sa.rc_channel, &sa.rc_bdaddr)) { err = -EADDRINUSE; } else { /* Save source address */ - bacpy(&bt_sk(sk)->src, &sa->rc_bdaddr); - rfcomm_pi(sk)->channel = sa->rc_channel; + bacpy(&bt_sk(sk)->src, &sa.rc_bdaddr); + rfcomm_pi(sk)->channel = sa.rc_channel; sk->sk_state = BT_BOUND; } diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index cce77b29dd0..167c785e0ca 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -333,6 +333,10 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); + + if (sock_flag(sk, SOCK_DEAD)) + break; + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } @@ -377,6 +381,10 @@ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; lock_sock(sk); + if (sock_flag(sk, SOCK_DEAD)) { + err = -ECONNRESET; + goto unlock; + } skb = skb_dequeue(&sk->sk_receive_queue); caif_check_flow_release(sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 22f94cb8890..a96ef56634c 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -253,9 +253,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl6.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - opt = np->opt; - - final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { @@ -272,13 +272,14 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, &ireq6->loc_addr, &ireq6->rmt_addr); fl6.daddr = ireq6->rmt_addr; - err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); + rcu_read_lock(); + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); } done: - if (opt != NULL && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); return err; } @@ -469,6 +470,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; @@ -601,16 +603,16 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ - if (opt != NULL) { - newnp->opt = ipv6_dup_options(newsk, opt); - if (opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); - } + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt != NULL) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); @@ -864,6 +866,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -966,7 +969,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { @@ -986,9 +990,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; - if (np->opt != NULL) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; inet->inet_dport = usin->sin6_port; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index d183262943d..61940872973 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -415,6 +415,19 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_DIAG_DESTROY + bool "INET: allow privileged process to administratively close sockets" + depends on INET_DIAG + default n + ---help--- + Provides a SOCK_DESTROY_BACKPORT operation that allows privileged processes + (e.g., a connection manager or a network administration tool such as + ss) to close sockets opened by other processes. Closing a socket in + this way interrupts any blocking read/write/connect operations on + the socket and causes future socket calls to behave as if the socket + had been disconnected. + If unsure, say N. + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8cb3091c8f6..8299c7242a3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -278,40 +278,65 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh); } -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, - const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + struct inet_diag_req_v2 *req) { - int err; struct sock *sk; - struct sk_buff *rep; - err = -EINVAL; if (req->sdiag_family == AF_INET) { - sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], + sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); } #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { - sk = inet6_lookup(&init_net, hashinfo, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - req->id.idiag_if); + if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && + ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) + sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3], + req->id.idiag_dport, req->id.idiag_src[3], + req->id.idiag_sport, req->id.idiag_if); + else + sk = inet6_lookup(net, hashinfo, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if); } #endif else { - goto out_nosk; + return ERR_PTR(-EINVAL); } - err = -ENOENT; - if (sk == NULL) - goto out_nosk; + if (!sk) + return ERR_PTR(-ENOENT); - err = sock_diag_check_cookie(sk, req->id.idiag_cookie); - if (err) - goto out; + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_put((struct inet_timewait_sock *)sk); + else + sock_put(sk); + return ERR_PTR(-ENOENT); + } + + return sk; +} +EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); + +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = inet_diag_find_one_icsk(net, hashinfo, req); + if (IS_ERR(sk)) + return PTR_ERR(sk); err = -ENOMEM; rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + @@ -341,12 +366,11 @@ out: else sock_put(sk); } -out_nosk: return err; } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -static int inet_diag_get_exact(struct sk_buff *in_skb, +static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) { @@ -356,8 +380,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, handler = inet_diag_lock_handler(req->sdiag_protocol); if (IS_ERR(handler)) err = PTR_ERR(handler); - else + else if (cmd == SOCK_DIAG_BY_FAMILY) err = handler->dump_one(in_skb, nlh, req); + else if (cmd == SOCK_DESTROY_BACKPORT && handler->destroy) + err = handler->destroy(in_skb, req); + else + err = -EOPNOTSUPP; inet_diag_unlock_handler(handler); return err; @@ -1049,7 +1077,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - return inet_diag_get_exact(in_skb, nlh, &req); + return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -1082,14 +1110,15 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact_compat(skb, nlh); } -static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) { + if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && + h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; attr = nlmsg_find_attr(h, hdrlen, @@ -1107,17 +1136,20 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } } - return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); + return inet_diag_cmd_exact(h->nlmsg_type, skb, h, + (struct inet_diag_req_v2 *)NLMSG_DATA(h)); } static struct sock_diag_handler inet_diag_handler = { .family = AF_INET, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, + .destroy = inet_diag_handler_cmd, }; static struct sock_diag_handler inet6_diag_handler = { .family = AF_INET6, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, + .destroy = inet_diag_handler_cmd, }; int inet_diag_register(const struct inet_diag_handler *h) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index af72c9ac88a..2cf31cc7a26 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -256,7 +256,7 @@ int ping_init_sock(struct sock *sk) int ret = 0; if (sk->sk_family == AF_INET6) - inet6_sk(sk)->ipv6only = 1; + inet6_sk(sk)->ipv6only = 1; inet_get_ping_group_range_net(net, range, range+1); if (range[0] <= group && group <= range[1]) @@ -480,8 +480,8 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) int family; struct icmphdr *icmph; struct inet_sock *inet_sock; - int type = icmp_hdr(skb)->type; - int code = icmp_hdr(skb)->code; + int type; + int code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; @@ -853,6 +853,8 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { struct inet_sock *isk = inet_sk(sk); int family = sk->sk_family; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; struct sk_buff *skb; int copied, err; @@ -862,6 +864,13 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; + if (addr_len) { + if (family == AF_INET) + *addr_len = sizeof(*sin); + else if (family == AF_INET6 && addr_len) + *addr_len = sizeof(*sin6); + } + if (flags & MSG_ERRQUEUE) { if (family == AF_INET) { return ip_recv_error(sk, msg, len, addr_len); @@ -891,14 +900,12 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Copy the address and add cmsg data. */ if (family == AF_INET) { - if (msg->msg_name) { - struct sockaddr_in *sin = (struct sockaddr_in *) msg->msg_name; - + sin = (struct sockaddr_in *) msg->msg_name; + if (sin) { sin->sin_family = AF_INET; sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); } if (isk->cmsg_flags) @@ -908,22 +915,19 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else if (family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *ip6 = ipv6_hdr(skb); + sin6 = (struct sockaddr_in6 *) msg->msg_name; - if (msg->msg_name) { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; - + if (sin6) { sin6->sin6_family = AF_INET6; sin6->sin6_port = 0; sin6->sin6_addr = ip6->saddr; - sin6->sin6_flowinfo = 0; if (np->sndflow) sin6->sin6_flowinfo = *(__be32 *)ip6 & IPV6_FLOWINFO_MASK; - - sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, - IP6CB(skb)->iif); - *addr_len = sizeof(*sin6); + sin6->sin6_scope_id = + ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); } if (inet6_sk(sk)->rxopt.all) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 87a79c673ff..f4278d0acb3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3271,6 +3271,43 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); +int tcp_abort(struct sock *sk, int err) +{ + if (sk->sk_state == TCP_TIME_WAIT) { + inet_twsk_put((struct inet_timewait_sock *)sk); + return -EOPNOTSUPP; + } + + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); + + if (sk->sk_state == TCP_LISTEN) { + tcp_set_state(sk, TCP_CLOSE); + inet_csk_listen_stop(sk); + } + + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ + local_bh_disable(); + bh_lock_sock(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_err = err; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); + } + + bh_unlock_sock(sk); + local_bh_enable(); + release_sock(sk); + sock_put(sk); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_abort); + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; @@ -3385,22 +3422,26 @@ void __init tcp_init(void) static int tcp_is_local(struct net *net, __be32 addr) { struct rtable *rt; struct flowi4 fl4 = { .daddr = addr }; - int res = 0; + int is_local; rt = ip_route_output_key(net, &fl4); if (IS_ERR_OR_NULL(rt)) return 0; - res = rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK); - /* Arp_cache entry increase to 1024 whenever WIFI <-> LTE(with CMC22x Modem). - So dst_release() is needed to release undestroy dst_entry */ - dst_release(&rt->dst); - return res; + is_local = rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK); + ip_rt_put(rt); + return is_local; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static int tcp_is_local6(struct net *net, struct in6_addr *addr) { struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0); - return rt6 && rt6->dst.dev && (rt6->dst.dev->flags & IFF_LOOPBACK); + int is_local; + if (rt6 == NULL) + return 0; + + is_local = rt6->dst.dev && (rt6->dst.dev->flags & IFF_LOOPBACK); + dst_release(&rt6->dst); + return is_local; } #endif @@ -3428,7 +3469,7 @@ int tcp_nuke_addr(struct net *net, struct sockaddr *addr) return -EAFNOSUPPORT; } - for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) { + for (bucket = 0; bucket <= tcp_hashinfo.ehash_mask; bucket++) { struct hlist_nulls_node *node; struct sock *sk; spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index ed3f2ad42e0..a56461c21b9 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -11,6 +11,8 @@ #include +#include +#include #include #include @@ -46,11 +48,28 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int tcp_diag_destroy(struct sk_buff *in_skb, + struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return sock_diag_destroy(sk, ECONNABORTED); +} +#endif + static const struct inet_diag_handler tcp_diag_handler = { .dump = tcp_diag_dump, .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = tcp_diag_destroy, +#endif }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bb0be7736e7..55a95e33f80 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2665,6 +2665,7 @@ struct proto tcp_prot = { .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, #endif + .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2fe1c054f3d..c680d14b60d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -202,6 +202,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_dad = 1, .accept_ra_prefix_route = 1, .accept_ra_mtu = 1, + .use_oif_addrs_only = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -239,6 +240,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_dad = 1, .accept_ra_prefix_route = 1, .accept_ra_mtu = 1, + .use_oif_addrs_only = 0, }; /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ @@ -968,6 +970,9 @@ enum { #endif IPV6_SADDR_RULE_ORCHID, IPV6_SADDR_RULE_PREFIX, +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + IPV6_SADDR_RULE_NOT_OPTIMISTIC, +#endif IPV6_SADDR_RULE_MAX }; @@ -995,6 +1000,15 @@ static inline int ipv6_saddr_preferred(int type) return 0; } +static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev) +{ +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic; +#else + return false; +#endif +} + static int ipv6_get_saddr_eval(struct net *net, struct ipv6_saddr_score *score, struct ipv6_saddr_dst *dst, @@ -1055,10 +1069,16 @@ static int ipv6_get_saddr_eval(struct net *net, score->scopedist = ret; break; case IPV6_SADDR_RULE_PREFERRED: + { /* Rule 3: Avoid deprecated and optimistic addresses */ + u8 avoid = IFA_F_DEPRECATED; + + if (!ipv6_use_optimistic_addr(score->ifa->idev)) + avoid |= IFA_F_OPTIMISTIC; ret = ipv6_saddr_preferred(score->addr_type) || - !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)); + !(score->ifa->flags & avoid); break; + } #ifdef CONFIG_IPV6_MIP6 case IPV6_SADDR_RULE_HOA: { @@ -1104,6 +1124,14 @@ static int ipv6_get_saddr_eval(struct net *net, score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr, dst->addr); break; +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + case IPV6_SADDR_RULE_NOT_OPTIMISTIC: + /* Optimistic addresses still have lower precedence than other + * preferred addresses. + */ + ret = !(score->ifa->flags & IFA_F_OPTIMISTIC); + break; +#endif default: ret = 0; } @@ -1151,9 +1179,15 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, * include addresses assigned to interfaces * belonging to the same site as the outgoing * interface.) + * - "It is RECOMMENDED that the candidate source addresses + * be the set of unicast addresses assigned to the + * interface that will be used to send to the destination + * (the 'outgoing' interface)." (RFC 6724) */ + idev = dst_dev ? __in6_dev_get(dst_dev) : NULL; if (((dst_type & IPV6_ADDR_MULTICAST) || - dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && + dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL || + (idev && idev->cnf.use_oif_addrs_only)) && dst.ifindex && dev->ifindex != dst.ifindex) continue; @@ -1300,7 +1334,9 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && - !(ifp->flags&IFA_F_TENTATIVE) && + (!(ifp->flags&IFA_F_TENTATIVE) || + (ipv6_use_optimistic_addr(ifp->idev) && + ifp->flags&IFA_F_OPTIMISTIC)) && (dev == NULL || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { rcu_read_unlock_bh(); @@ -3065,8 +3101,15 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) * Optimistic nodes can start receiving * Frames right away */ - if (ifp->flags & IFA_F_OPTIMISTIC) + if (ifp->flags & IFA_F_OPTIMISTIC) { ip6_ins_rt(ifp->rt); + if (ipv6_use_optimistic_addr(idev)) { + /* Because optimistic nodes can use this address, + * notify listeners. If DAD fails, RTM_DELADDR is sent. + */ + ipv6_ifa_notify(RTM_NEWADDR, ifp); + } + } addrconf_dad_kick(ifp); out: @@ -4017,6 +4060,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; + array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; @@ -4025,6 +4069,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; + array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; } static inline size_t inet6_ifla6_size(void) @@ -4687,6 +4732,14 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, + { + .procname = "use_optimistic", + .data = &ipv6_devconf.use_optimistic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, #endif #ifdef CONFIG_IPV6_MROUTE { @@ -4732,6 +4785,14 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "use_oif_addrs_only", + .data = &ipv6_devconf.use_oif_addrs_only, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, { /* sentinel */ } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index eba639fd665..b0f8c114821 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -454,8 +454,11 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ - if ((opt = xchg(&np->opt, NULL)) != NULL) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -711,7 +714,10 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), + &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a21a2ecb0ad..3da4b974449 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -167,8 +167,10 @@ ipv4_connected: security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - opt = flowlabel ? flowlabel->opt : np->opt; + rcu_read_lock(); + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); err = 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3d641b6e9b0..e66773850e5 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -748,6 +748,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) *((char**)&opt2->dst1opt) += dif; if (opt2->srcrt) *((char**)&opt2->srcrt) += dif; + atomic_set(&opt2->refcnt, 1); } return opt2; } @@ -812,7 +813,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, return ERR_PTR(-ENOBUFS); memset(opt2, 0, tot_len); - + atomic_set(&opt2->refcnt, 1); opt2->tot_len = tot_len; p = (char *)(opt2 + 1); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index aefc8b71809..67aa2c2b502 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -66,7 +66,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = treq->rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); fl6.saddr = treq->loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = inet_rsk(req)->ir_mark; @@ -227,7 +229,9 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = __inet6_csk_dst_check(sk, np->dst_cookie); @@ -250,7 +254,8 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) /* Restore final destination back after routing done */ fl6.daddr = np->daddr; - res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); rcu_read_unlock(); return res; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 34c1109d346..c12a68a7f5e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -110,10 +110,12 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } - opt = xchg(&inet6_sk(sk)->opt, opt); + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, + opt); } else { spin_lock(&sk->sk_dst_lock); - opt = xchg(&inet6_sk(sk)->opt, opt); + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, + opt); spin_unlock(&sk->sk_dst_lock); } sk_dst_reset(sk); @@ -213,9 +215,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, + NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); @@ -384,7 +389,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) break; - opt = ipv6_renew_options(sk, np->opt, optname, + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); if (IS_ERR(opt)) { @@ -413,8 +419,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; opt = ipv6_update_options(sk, opt); sticky_done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } @@ -467,6 +475,7 @@ sticky_done: break; memset(opt, 0, sizeof(*opt)); + atomic_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_user(opt+1, optval, optlen)) @@ -483,8 +492,10 @@ update: retv = 0; opt = ipv6_update_options(sk, opt); done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } case IPV6_UNICAST_HOPS: @@ -1085,10 +1096,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_RTHDR: case IPV6_DSTOPTS: { + struct ipv6_txoptions *opt; lock_sock(sk); - len = ipv6_getsockopt_sticky(sk, np->opt, - optname, optval, len); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 187b1419440..53897a93f58 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1273,8 +1273,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; } else { - ND_PRINTK2(KERN_WARNING - "RA: Got route advertisement with lower hop_limit than current\n"); + ND_PRINTK2(KERN_WARNING, "RA: Got route advertisement with lower hop_limit than current\n"); } if (rt) dst_metric_set(&rt->dst, RTAX_HOPLIMIT, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index b2de91f8937..7a0dfe0d2f6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -727,6 +727,7 @@ static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions opt_space; struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; struct in6_addr *daddr, *final_p, final; @@ -834,8 +835,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -902,6 +905,7 @@ done: dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err<0?err:len; do_confirm: dst_confirm(dst); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bd946074949..f5d3bf3b380 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2651,6 +2651,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (tb[RTA_OIF]) oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_MARK]) + fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (tb[RTA_UID]) fl6.flowi6_uid = nla_get_u32(tb[RTA_UID]); else diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index af939afeae2..b57996a4fd6 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -240,7 +240,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = ireq6->rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); fl6.saddr = ireq6->loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = inet_rsk(req)->ir_mark; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a18d9368016..e5f64991ce0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -132,6 +132,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct rt6_info *rt; struct flowi6 fl6; struct dst_entry *dst; @@ -253,7 +254,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sock_i_uid(sk); - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -296,9 +298,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } icsk->icsk_ext_hdr_len = 0; - if (np->opt) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + + opt->opt_nflen; tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); @@ -516,7 +518,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); fl6.daddr = treq->rmt_addr; - err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); err = net_xmit_eval(err); } @@ -1243,10 +1246,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct inet6_request_sock *treq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; - struct ipv6_txoptions *opt; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif @@ -1376,16 +1379,15 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, but we make one more one thing there: reattach optmem to newsk. */ + opt = rcu_dereference(np->opt); if (opt) { - newnp->opt = ipv6_dup_options(newsk, opt); - if (opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); } - inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); @@ -2138,6 +2140,7 @@ struct proto tcpv6_prot = { .proto_cgroup = tcp_proto_cgroup, #endif .clear_sk = tcp_v6_clear_sk, + .diag_destroy = tcp_abort, }; static const struct inet6_protocol tcpv6_protocol = { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5809231c1ef..d3e170e9fad 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -959,6 +959,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; + struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi6 fl6; struct dst_entry *dst; @@ -1113,8 +1114,10 @@ do_udp_sendmsg: opt = NULL; connected = 0; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -1215,6 +1218,7 @@ do_append_data: out: dst_release(dst); fl6_sock_release(flowlabel); + txopt_put(opt_to_free); if (!err) return len; /* diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index b519ef0dc45..21ef6a153f8 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -2599,7 +2599,8 @@ static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) tag_t tag = ppi->ts_entry->tn.tag; uid_t stat_uid = get_uid_from_tag(tag); /* Detailed tags are not available to everybody */ - if (!can_read_other_uid_stats(stat_uid)) { + if (get_atag_from_tag(tag) + && !can_read_other_uid_stats(stat_uid)) { CT_DEBUG("qtaguid: stats line: " "%s 0x%llx %u: insufficient priv " "from pid=%u tgid=%u uid=%u stats.gid=%u\n", diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index c3930172c06..b50336b2bd1 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -235,7 +235,7 @@ static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - freezable_schedule(); + freezable_schedule_unsafe(); return 0; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a6dada4df13..cc60659f3de 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -114,6 +114,7 @@ #include #include #include +#include struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; EXPORT_SYMBOL_GPL(unix_socket_table); @@ -2055,8 +2056,12 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); unix_state_unlock(sk); - timeo = schedule_timeout(timeo); + timeo = freezable_schedule_timeout(timeo); unix_state_lock(sk); + + if (sock_flag(sk, SOCK_DEAD)) + break; + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } @@ -2120,6 +2125,10 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; unix_state_lock(sk); + if (sock_flag(sk, SOCK_DEAD)) { + err = -ECONNRESET; + goto unlock; + } skb = skb_peek(&sk->sk_receive_queue); again: if (skb == NULL) { diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d1255bf9067..563c2e7e056 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -18,7 +18,7 @@ #include "nl80211.h" #include "wext-compat.h" -#define IEEE80211_SCAN_RESULT_EXPIRE (6 * HZ) +#define IEEE80211_SCAN_RESULT_EXPIRE (7 * HZ) void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) { diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index efa5d940e63..24c62a39212 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -23,7 +23,7 @@ quiet_cmd_modules_install = INSTALL $@ INSTALL_MOD_DIR ?= extra ext-mod-dir = $(INSTALL_MOD_DIR)$(subst $(patsubst %/,%,$(KBUILD_EXTMOD)),,$(@D)) -modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D)) +modinst_dir ?= $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D)) $(modules): $(call cmd,modules_install,$(MODLIB)/$(modinst_dir)) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 11fb4cff93b..79d6058c1d6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -700,7 +700,12 @@ static int selinux_set_mnt_opts(struct super_block *sb, } if (strcmp(sb->s_type->name, "proc") == 0) - sbsec->flags |= SE_SBPROC; + sbsec->flags |= SE_SBPROC | SE_SBGENFS; + + if (!strcmp(sb->s_type->name, "debugfs") || + !strcmp(sb->s_type->name, "sysfs") || + !strcmp(sb->s_type->name, "pstore")) + sbsec->flags |= SE_SBGENFS; /* Determine the labeling behavior to use for this filesystem type. */ rc = security_fs_use((sbsec->flags & SE_SBPROC) ? "proc" : sb->s_type->name, &sbsec->behavior, &sbsec->sid); @@ -1154,12 +1159,13 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc return SECCLASS_SOCKET; } -#ifdef CONFIG_PROC_FS -static int selinux_proc_get_sid(struct dentry *dentry, - u16 tclass, - u32 *sid) +static int selinux_genfs_get_sid(struct dentry *dentry, + u16 tclass, + u16 flags, + u32 *sid) { int rc; + struct super_block *sb = dentry->d_inode->i_sb; char *buffer, *path; buffer = (char *)__get_free_page(GFP_KERNEL); @@ -1170,26 +1176,20 @@ static int selinux_proc_get_sid(struct dentry *dentry, if (IS_ERR(path)) rc = PTR_ERR(path); else { - /* each process gets a /proc/PID/ entry. Strip off the - * PID part to get a valid selinux labeling. - * e.g. /proc/1/net/rpc/nfs -> /net/rpc/nfs */ - while (path[1] >= '0' && path[1] <= '9') { - path[1] = '/'; - path++; + if (flags & SE_SBPROC) { + /* each process gets a /proc/PID/ entry. Strip off the + * PID part to get a valid selinux labeling. + * e.g. /proc/1/net/rpc/nfs -> /net/rpc/nfs */ + while (path[1] >= '0' && path[1] <= '9') { + path[1] = '/'; + path++; + } } - rc = security_genfs_sid("proc", path, tclass, sid); + rc = security_genfs_sid(sb->s_type->name, path, tclass, sid); } free_page((unsigned long)buffer); return rc; } -#else -static int selinux_proc_get_sid(struct dentry *dentry, - u16 tclass, - u32 *sid) -{ - return -EINVAL; -} -#endif /* The inode's security attributes must be initialized before first use. */ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry) @@ -1344,7 +1344,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent /* Default to the fs superblock SID. */ isec->sid = sbsec->sid; - if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) { + if ((sbsec->flags & SE_SBGENFS) && !S_ISLNK(inode->i_mode)) { /* We must have a dentry to determine the label on * procfs inodes */ if (opt_dentry) @@ -1367,7 +1367,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent if (!dentry) goto out_unlock; isec->sclass = inode_mode_to_security_class(inode->i_mode); - rc = selinux_proc_get_sid(dentry, isec->sclass, &sid); + rc = selinux_genfs_get_sid(dentry, isec->sclass, + sbsec->flags, &sid); dput(dentry); if (rc) goto out_unlock; diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 9008e23baeb..20b00fc37cc 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -145,8 +145,9 @@ struct security_class_mapping secclass_map[] = { "node_bind", "name_connect", NULL } }, { "memprotect", { "mmap_zero", NULL } }, { "peer", { "recv", NULL } }, - { "capability2", { "mac_override", "mac_admin", "syslog", - "wake_alarm", "block_suspend", NULL } }, + { "capability2", + { "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend", + NULL } }, { "kernel_service", { "use_as_override", "create_files_as", NULL } }, { "tun_socket", { COMMON_SOCK_PERMS, NULL } }, diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 2d7eee7dcde..9d8eb323d24 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -55,6 +55,7 @@ #define SE_SBINITIALIZED 0x10 #define SE_SBPROC 0x20 #define SE_SBLABELSUPP 0x40 +#define SE_SBGENFS 0x80 #define CONTEXT_STR "context=" #define FSCONTEXT_STR "fscontext=" diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 93f8d749037..eec10b5d912 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -107,6 +107,7 @@ static struct nlmsg_perm nlmsg_xfrm_perms[] = { XFRM_MSG_MIGRATE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_NEWSADINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_GETSADINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, + { XFRM_MSG_NEWSPDINFO, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETSPDINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_MAPPING, NETLINK_XFRM_SOCKET__NLMSG_READ }, }; diff --git a/sound/core/timer.c b/sound/core/timer.c index 5dba0deb10c..025cafc65b8 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1822,7 +1822,7 @@ static long __snd_timer_user_ioctl(struct file *file, unsigned int cmd, { int xarg; - if (tu->timeri) + if (tu->timeri) /* too late */ return -EBUSY; if (get_user(xarg, p)) return -EFAULT; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 7dcfab8f432..f686f96ab69 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1580,7 +1580,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card) struct snd_soc_dai_link *dai_link; int ret, i, order; - mutex_lock(&card->mutex); + mutex_lock_nested(&card->mutex, SND_SOC_CARD_CLASS_INIT); /* bind DAIs */ diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index d8a22c07e11..ff548d99dbb 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2368,7 +2368,7 @@ err: int snd_soc_dapm_add_routes(struct snd_soc_dapm_context *dapm, const struct snd_soc_dapm_route *route, int num) { - int i, ret; + int i, ret = 0; mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_INIT); for (i = 0; i < num; i++) { @@ -2376,14 +2376,13 @@ int snd_soc_dapm_add_routes(struct snd_soc_dapm_context *dapm, if (ret < 0) { dev_err(dapm->dev, "Failed to add route %s->%s\n", route->source, route->sink); - mutex_unlock(&dapm->card->dapm_mutex); - return ret; + break; } route++; } mutex_unlock(&dapm->card->dapm_mutex); - return 0; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dapm_add_routes); @@ -3077,7 +3076,8 @@ int snd_soc_dapm_new_controls(struct snd_soc_dapm_context *dapm, const struct snd_soc_dapm_widget *widget, int num) { - int i, ret; + int i; + int ret = 0; mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_INIT); for (i = 0; i < num; i++) { @@ -3086,13 +3086,13 @@ int snd_soc_dapm_new_controls(struct snd_soc_dapm_context *dapm, dev_err(dapm->dev, "ASoC: Failed to create DAPM control %s: %d\n", widget->name, ret); - mutex_unlock(&dapm->card->dapm_mutex); - return ret; + ret = -ENOMEM; + break; } widget++; } mutex_unlock(&dapm->card->dapm_mutex); - return 0; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dapm_new_controls);