diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 17cf65c94804..91dc4bb13032 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -844,4 +844,5 @@ ia32_sys_call_table: .quad compat_sys_recvmmsg .quad sys_fanotify_init .quad sys32_fanotify_mark + .quad sys_prlimit64 ia32_syscall_end: diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 80b799cd74f7..b766a5e8ba0e 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -345,10 +345,11 @@ #define __NR_recvmmsg 337 #define __NR_fanotify_init 338 #define __NR_fanotify_mark 339 +#define __NR_prlimit64 340 #ifdef __KERNEL__ -#define NR_syscalls 340 +#define NR_syscalls 341 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 5b7b1d585616..363e9b8a715b 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -667,6 +667,8 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg) __SYSCALL(__NR_fanotify_init, sys_fanotify_init) #define __NR_fanotify_mark 301 __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) +#define __NR_prlimit64 302 +__SYSCALL(__NR_prlimit64, sys_prlimit64) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 07ad5eb7cc5c..4802accb9d8d 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -339,3 +339,4 @@ ENTRY(sys_call_table) .long sys_recvmmsg .long sys_fanotify_init .long sys_fanotify_mark + .long sys_prlimit64 diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index c17cebc49952..e1898090f22c 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -640,9 +640,11 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg) #define __NR_wait4 260 __SYSCALL(__NR_wait4, sys_wait4) +#define __NR_prlimit64 261 +__SYSCALL(__NR_prlimit64, sys_prlimit64) #undef __NR_syscalls -#define __NR_syscalls 261 +#define __NR_syscalls 262 /* * All syscalls below here should go away really, diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 4f71bf4e628c..3e23844a6990 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -117,6 +117,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, long clock_nanosleep_restart(struct restart_block *restart_block); -void update_rlimit_cpu(unsigned long rlim_new); +void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); #endif diff --git a/include/linux/resource.h b/include/linux/resource.h index f1e914eefeab..88d36f9145ba 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -43,6 +43,13 @@ struct rlimit { unsigned long rlim_max; }; +#define RLIM64_INFINITY (~0ULL) + +struct rlimit64 { + __u64 rlim_cur; + __u64 rlim_max; +}; + #define PRIO_MIN (-20) #define PRIO_MAX 20 @@ -73,6 +80,8 @@ struct rlimit { struct task_struct; int getrusage(struct task_struct *p, int who, struct rusage __user *ru); +int do_prlimit(struct task_struct *tsk, unsigned int resource, + struct rlimit *new_rlim, struct rlimit *old_rlim); #endif /* __KERNEL__ */ diff --git a/include/linux/security.h b/include/linux/security.h index 5bcb395a49d4..a22219afff09 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1499,7 +1499,8 @@ struct security_operations { int (*task_setnice) (struct task_struct *p, int nice); int (*task_setioprio) (struct task_struct *p, int ioprio); int (*task_getioprio) (struct task_struct *p); - int (*task_setrlimit) (unsigned int resource, struct rlimit *new_rlim); + int (*task_setrlimit) (struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim); int (*task_setscheduler) (struct task_struct *p, int policy, struct sched_param *lp); int (*task_getscheduler) (struct task_struct *p); @@ -1749,7 +1750,8 @@ void security_task_getsecid(struct task_struct *p, u32 *secid); int security_task_setnice(struct task_struct *p, int nice); int security_task_setioprio(struct task_struct *p, int ioprio); int security_task_getioprio(struct task_struct *p); -int security_task_setrlimit(unsigned int resource, struct rlimit *new_rlim); +int security_task_setrlimit(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim); int security_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); int security_task_getscheduler(struct task_struct *p); @@ -2311,7 +2313,8 @@ static inline int security_task_getioprio(struct task_struct *p) return 0; } -static inline int security_task_setrlimit(unsigned int resource, +static inline int security_task_setrlimit(struct task_struct *p, + unsigned int resource, struct rlimit *new_rlim) { return 0; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2ab198a1e38d..1b67bd333b5e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -35,6 +35,7 @@ struct oldold_utsname; struct old_utsname; struct pollfd; struct rlimit; +struct rlimit64; struct rusage; struct sched_param; struct sel_arg_struct; @@ -644,6 +645,9 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r #endif asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim); +asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource, + const struct rlimit64 __user *new_rlim, + struct rlimit64 __user *old_rlim); asmlinkage long sys_getrusage(int who, struct rusage __user *ru); asmlinkage long sys_umask(int mask); diff --git a/kernel/compat.c b/kernel/compat.c index 5adab05a3172..e167efce8423 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -279,11 +279,6 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource, struct compat_rlimit __user *rlim) { struct rlimit r; - int ret; - mm_segment_t old_fs = get_fs (); - - if (resource >= RLIM_NLIMITS) - return -EINVAL; if (!access_ok(VERIFY_READ, rlim, sizeof(*rlim)) || __get_user(r.rlim_cur, &rlim->rlim_cur) || @@ -294,10 +289,7 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource, r.rlim_cur = RLIM_INFINITY; if (r.rlim_max == COMPAT_RLIM_INFINITY) r.rlim_max = RLIM_INFINITY; - set_fs(KERNEL_DS); - ret = sys_setrlimit(resource, (struct rlimit __user *) &r); - set_fs(old_fs); - return ret; + return do_prlimit(current, resource, &r, NULL); } #ifdef COMPAT_RLIM_OLD_INFINITY @@ -329,16 +321,13 @@ asmlinkage long compat_sys_old_getrlimit(unsigned int resource, #endif -asmlinkage long compat_sys_getrlimit (unsigned int resource, +asmlinkage long compat_sys_getrlimit(unsigned int resource, struct compat_rlimit __user *rlim) { struct rlimit r; int ret; - mm_segment_t old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = sys_getrlimit(resource, (struct rlimit __user *) &r); - set_fs(old_fs); + ret = do_prlimit(current, resource, NULL, &r); if (!ret) { if (r.rlim_cur > COMPAT_RLIM_INFINITY) r.rlim_cur = COMPAT_RLIM_INFINITY; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index f66bdd33a6c6..6842eeba5879 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -16,13 +16,13 @@ * siglock protection since other code may update expiration cache as * well. */ -void update_rlimit_cpu(unsigned long rlim_new) +void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) { cputime_t cputime = secs_to_cputime(rlim_new); - spin_lock_irq(¤t->sighand->siglock); - set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); - spin_unlock_irq(¤t->sighand->siglock); + spin_lock_irq(&task->sighand->siglock); + set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL); + spin_unlock_irq(&task->sighand->siglock); } static int check_clock(const clockid_t which_clock) diff --git a/kernel/sys.c b/kernel/sys.c index e83ddbbaf89d..e9ad44489828 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1236,15 +1236,14 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) { - if (resource >= RLIM_NLIMITS) - return -EINVAL; - else { - struct rlimit value; - task_lock(current->group_leader); - value = current->signal->rlim[resource]; - task_unlock(current->group_leader); - return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; - } + struct rlimit value; + int ret; + + ret = do_prlimit(current, resource, NULL, &value); + if (!ret) + ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; + + return ret; } #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT @@ -1272,44 +1271,89 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, #endif -SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) +static inline bool rlim64_is_infinity(__u64 rlim64) { - struct rlimit new_rlim, *old_rlim; - int retval; +#if BITS_PER_LONG < 64 + return rlim64 >= ULONG_MAX; +#else + return rlim64 == RLIM64_INFINITY; +#endif +} + +static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) +{ + if (rlim->rlim_cur == RLIM_INFINITY) + rlim64->rlim_cur = RLIM64_INFINITY; + else + rlim64->rlim_cur = rlim->rlim_cur; + if (rlim->rlim_max == RLIM_INFINITY) + rlim64->rlim_max = RLIM64_INFINITY; + else + rlim64->rlim_max = rlim->rlim_max; +} + +static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) +{ + if (rlim64_is_infinity(rlim64->rlim_cur)) + rlim->rlim_cur = RLIM_INFINITY; + else + rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; + if (rlim64_is_infinity(rlim64->rlim_max)) + rlim->rlim_max = RLIM_INFINITY; + else + rlim->rlim_max = (unsigned long)rlim64->rlim_max; +} + +/* make sure you are allowed to change @tsk limits before calling this */ +int do_prlimit(struct task_struct *tsk, unsigned int resource, + struct rlimit *new_rlim, struct rlimit *old_rlim) +{ + struct rlimit *rlim; + int retval = 0; if (resource >= RLIM_NLIMITS) return -EINVAL; - if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) - return -EFAULT; - if (new_rlim.rlim_cur > new_rlim.rlim_max) - return -EINVAL; - old_rlim = current->signal->rlim + resource; - if ((new_rlim.rlim_max > old_rlim->rlim_max) && - !capable(CAP_SYS_RESOURCE)) - return -EPERM; - if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open) - return -EPERM; - - retval = security_task_setrlimit(resource, &new_rlim); - if (retval) - return retval; - - if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) { - /* - * The caller is asking for an immediate RLIMIT_CPU - * expiry. But we use the zero value to mean "it was - * never set". So let's cheat and make it one second - * instead - */ - new_rlim.rlim_cur = 1; + if (new_rlim) { + if (new_rlim->rlim_cur > new_rlim->rlim_max) + return -EINVAL; + if (resource == RLIMIT_NOFILE && + new_rlim->rlim_max > sysctl_nr_open) + return -EPERM; } - task_lock(current->group_leader); - *old_rlim = new_rlim; - task_unlock(current->group_leader); - - if (resource != RLIMIT_CPU) + /* protect tsk->signal and tsk->sighand from disappearing */ + read_lock(&tasklist_lock); + if (!tsk->sighand) { + retval = -ESRCH; goto out; + } + + rlim = tsk->signal->rlim + resource; + task_lock(tsk->group_leader); + if (new_rlim) { + if (new_rlim->rlim_max > rlim->rlim_max && + !capable(CAP_SYS_RESOURCE)) + retval = -EPERM; + if (!retval) + retval = security_task_setrlimit(tsk->group_leader, + resource, new_rlim); + if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { + /* + * The caller is asking for an immediate RLIMIT_CPU + * expiry. But we use the zero value to mean "it was + * never set". So let's cheat and make it one second + * instead + */ + new_rlim->rlim_cur = 1; + } + } + if (!retval) { + if (old_rlim) + *old_rlim = *rlim; + if (new_rlim) + *rlim = *new_rlim; + } + task_unlock(tsk->group_leader); /* * RLIMIT_CPU handling. Note that the kernel fails to return an error @@ -1317,14 +1361,84 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) * very long-standing error, and fixing it now risks breakage of * applications, so we live with it */ - if (new_rlim.rlim_cur == RLIM_INFINITY) - goto out; - - update_rlimit_cpu(new_rlim.rlim_cur); + if (!retval && new_rlim && resource == RLIMIT_CPU && + new_rlim->rlim_cur != RLIM_INFINITY) + update_rlimit_cpu(tsk, new_rlim->rlim_cur); out: + read_unlock(&tasklist_lock); + return retval; +} + +/* rcu lock must be held */ +static int check_prlimit_permission(struct task_struct *task) +{ + const struct cred *cred = current_cred(), *tcred; + + tcred = __task_cred(task); + if ((cred->uid != tcred->euid || + cred->uid != tcred->suid || + cred->uid != tcred->uid || + cred->gid != tcred->egid || + cred->gid != tcred->sgid || + cred->gid != tcred->gid) && + !capable(CAP_SYS_RESOURCE)) { + return -EPERM; + } + return 0; } +SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, + const struct rlimit64 __user *, new_rlim, + struct rlimit64 __user *, old_rlim) +{ + struct rlimit64 old64, new64; + struct rlimit old, new; + struct task_struct *tsk; + int ret; + + if (new_rlim) { + if (copy_from_user(&new64, new_rlim, sizeof(new64))) + return -EFAULT; + rlim64_to_rlim(&new64, &new); + } + + rcu_read_lock(); + tsk = pid ? find_task_by_vpid(pid) : current; + if (!tsk) { + rcu_read_unlock(); + return -ESRCH; + } + ret = check_prlimit_permission(tsk); + if (ret) { + rcu_read_unlock(); + return ret; + } + get_task_struct(tsk); + rcu_read_unlock(); + + ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, + old_rlim ? &old : NULL); + + if (!ret && old_rlim) { + rlim_to_rlim64(&old, &old64); + if (copy_to_user(old_rlim, &old64, sizeof(old64))) + ret = -EFAULT; + } + + put_task_struct(tsk); + return ret; +} + +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) +{ + struct rlimit new_rlim; + + if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) + return -EFAULT; + return do_prlimit(current, resource, &new_rlim, NULL); +} + /* * It would make sense to put struct rusage in the task_struct, * except that would make the task_struct be *really big*. After diff --git a/security/capability.c b/security/capability.c index a0bbf30fb6dc..95a6599a37bb 100644 --- a/security/capability.c +++ b/security/capability.c @@ -411,7 +411,8 @@ static int cap_task_getioprio(struct task_struct *p) return 0; } -static int cap_task_setrlimit(unsigned int resource, struct rlimit *new_rlim) +static int cap_task_setrlimit(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim) { return 0; } diff --git a/security/security.c b/security/security.c index 7461b1bc296c..c53949f17d9e 100644 --- a/security/security.c +++ b/security/security.c @@ -780,9 +780,10 @@ int security_task_getioprio(struct task_struct *p) return security_ops->task_getioprio(p); } -int security_task_setrlimit(unsigned int resource, struct rlimit *new_rlim) +int security_task_setrlimit(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim) { - return security_ops->task_setrlimit(resource, new_rlim); + return security_ops->task_setrlimit(p, resource, new_rlim); } int security_task_setscheduler(struct task_struct *p, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9b40f4c0ac70..42043f96e54f 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2284,12 +2284,15 @@ static void selinux_bprm_committing_creds(struct linux_binprm *bprm) rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__RLIMITINH, NULL); if (rc) { + /* protect against do_prlimit() */ + task_lock(current); for (i = 0; i < RLIM_NLIMITS; i++) { rlim = current->signal->rlim + i; initrlim = init_task.signal->rlim + i; rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); } - update_rlimit_cpu(current->signal->rlim[RLIMIT_CPU].rlim_cur); + task_unlock(current); + update_rlimit_cpu(current, rlimit(RLIMIT_CPU)); } } @@ -3333,16 +3336,17 @@ static int selinux_task_getioprio(struct task_struct *p) return current_has_perm(p, PROCESS__GETSCHED); } -static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim) +static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim) { - struct rlimit *old_rlim = current->signal->rlim + resource; + struct rlimit *old_rlim = p->signal->rlim + resource; /* Control the ability to change the hard limit (whether lowering or raising it), so that the hard limit can later be used as a safe reset point for the soft limit upon context transitions. See selinux_bprm_committing_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) - return current_has_perm(current, PROCESS__SETRLIMIT); + return current_has_perm(p, PROCESS__SETRLIMIT); return 0; }