Merge branch 'writable_limits' of git://decibel.fi.muni.cz/~xslaby/linux

* 'writable_limits' of git://decibel.fi.muni.cz/~xslaby/linux:
  unistd: add __NR_prlimit64 syscall numbers
  rlimits: implement prlimit64 syscall
  rlimits: switch more rlimit syscalls to do_prlimit
  rlimits: redo do_setrlimit to more generic do_prlimit
  rlimits: add rlimit64 structure
  rlimits: do security check under task_lock
  rlimits: allow setrlimit to non-current tasks
  rlimits: split sys_setrlimit
  rlimits: selinux, do rlimits changes under task_lock
  rlimits: make sure ->rlim_max never grows in sys_setrlimit
  rlimits: add task_struct to update_rlimit_cpu
  rlimits: security, add task_struct to setrlimit

Fix up various system call number conflicts.  We not only added fanotify
system calls in the meantime, but asm-generic/unistd.h added a wait4
along with a range of reserved per-architecture system calls.
This commit is contained in:
Linus Torvalds 2010-08-10 12:07:51 -07:00
commit b34d8915c4
15 changed files with 207 additions and 75 deletions

View file

@ -844,4 +844,5 @@ ia32_sys_call_table:
.quad compat_sys_recvmmsg
.quad sys_fanotify_init
.quad sys32_fanotify_mark
.quad sys_prlimit64
ia32_syscall_end:

View file

@ -345,10 +345,11 @@
#define __NR_recvmmsg 337
#define __NR_fanotify_init 338
#define __NR_fanotify_mark 339
#define __NR_prlimit64 340
#ifdef __KERNEL__
#define NR_syscalls 340
#define NR_syscalls 341
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR

View file

@ -667,6 +667,8 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
#define __NR_fanotify_mark 301
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
#define __NR_prlimit64 302
__SYSCALL(__NR_prlimit64, sys_prlimit64)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR

View file

@ -339,3 +339,4 @@ ENTRY(sys_call_table)
.long sys_recvmmsg
.long sys_fanotify_init
.long sys_fanotify_mark
.long sys_prlimit64

View file

@ -640,9 +640,11 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
#define __NR_wait4 260
__SYSCALL(__NR_wait4, sys_wait4)
#define __NR_prlimit64 261
__SYSCALL(__NR_prlimit64, sys_prlimit64)
#undef __NR_syscalls
#define __NR_syscalls 261
#define __NR_syscalls 262
/*
* All syscalls below here should go away really,

View file

@ -117,6 +117,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
long clock_nanosleep_restart(struct restart_block *restart_block);
void update_rlimit_cpu(unsigned long rlim_new);
void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
#endif

View file

@ -43,6 +43,13 @@ struct rlimit {
unsigned long rlim_max;
};
#define RLIM64_INFINITY (~0ULL)
struct rlimit64 {
__u64 rlim_cur;
__u64 rlim_max;
};
#define PRIO_MIN (-20)
#define PRIO_MAX 20
@ -73,6 +80,8 @@ struct rlimit {
struct task_struct;
int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
int do_prlimit(struct task_struct *tsk, unsigned int resource,
struct rlimit *new_rlim, struct rlimit *old_rlim);
#endif /* __KERNEL__ */

View file

@ -1499,7 +1499,8 @@ struct security_operations {
int (*task_setnice) (struct task_struct *p, int nice);
int (*task_setioprio) (struct task_struct *p, int ioprio);
int (*task_getioprio) (struct task_struct *p);
int (*task_setrlimit) (unsigned int resource, struct rlimit *new_rlim);
int (*task_setrlimit) (struct task_struct *p, unsigned int resource,
struct rlimit *new_rlim);
int (*task_setscheduler) (struct task_struct *p, int policy,
struct sched_param *lp);
int (*task_getscheduler) (struct task_struct *p);
@ -1749,7 +1750,8 @@ void security_task_getsecid(struct task_struct *p, u32 *secid);
int security_task_setnice(struct task_struct *p, int nice);
int security_task_setioprio(struct task_struct *p, int ioprio);
int security_task_getioprio(struct task_struct *p);
int security_task_setrlimit(unsigned int resource, struct rlimit *new_rlim);
int security_task_setrlimit(struct task_struct *p, unsigned int resource,
struct rlimit *new_rlim);
int security_task_setscheduler(struct task_struct *p,
int policy, struct sched_param *lp);
int security_task_getscheduler(struct task_struct *p);
@ -2311,7 +2313,8 @@ static inline int security_task_getioprio(struct task_struct *p)
return 0;
}
static inline int security_task_setrlimit(unsigned int resource,
static inline int security_task_setrlimit(struct task_struct *p,
unsigned int resource,
struct rlimit *new_rlim)
{
return 0;

View file

@ -35,6 +35,7 @@ struct oldold_utsname;
struct old_utsname;
struct pollfd;
struct rlimit;
struct rlimit64;
struct rusage;
struct sched_param;
struct sel_arg_struct;
@ -644,6 +645,9 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
#endif
asmlinkage long sys_setrlimit(unsigned int resource,
struct rlimit __user *rlim);
asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource,
const struct rlimit64 __user *new_rlim,
struct rlimit64 __user *old_rlim);
asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
asmlinkage long sys_umask(int mask);

View file

@ -279,11 +279,6 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource,
struct compat_rlimit __user *rlim)
{
struct rlimit r;
int ret;
mm_segment_t old_fs = get_fs ();
if (resource >= RLIM_NLIMITS)
return -EINVAL;
if (!access_ok(VERIFY_READ, rlim, sizeof(*rlim)) ||
__get_user(r.rlim_cur, &rlim->rlim_cur) ||
@ -294,10 +289,7 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource,
r.rlim_cur = RLIM_INFINITY;
if (r.rlim_max == COMPAT_RLIM_INFINITY)
r.rlim_max = RLIM_INFINITY;
set_fs(KERNEL_DS);
ret = sys_setrlimit(resource, (struct rlimit __user *) &r);
set_fs(old_fs);
return ret;
return do_prlimit(current, resource, &r, NULL);
}
#ifdef COMPAT_RLIM_OLD_INFINITY
@ -329,16 +321,13 @@ asmlinkage long compat_sys_old_getrlimit(unsigned int resource,
#endif
asmlinkage long compat_sys_getrlimit (unsigned int resource,
asmlinkage long compat_sys_getrlimit(unsigned int resource,
struct compat_rlimit __user *rlim)
{
struct rlimit r;
int ret;
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
ret = sys_getrlimit(resource, (struct rlimit __user *) &r);
set_fs(old_fs);
ret = do_prlimit(current, resource, NULL, &r);
if (!ret) {
if (r.rlim_cur > COMPAT_RLIM_INFINITY)
r.rlim_cur = COMPAT_RLIM_INFINITY;

View file

@ -16,13 +16,13 @@
* siglock protection since other code may update expiration cache as
* well.
*/
void update_rlimit_cpu(unsigned long rlim_new)
void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
{
cputime_t cputime = secs_to_cputime(rlim_new);
spin_lock_irq(&current->sighand->siglock);
set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
spin_unlock_irq(&current->sighand->siglock);
spin_lock_irq(&task->sighand->siglock);
set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
spin_unlock_irq(&task->sighand->siglock);
}
static int check_clock(const clockid_t which_clock)

View file

@ -1236,15 +1236,14 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
if (resource >= RLIM_NLIMITS)
return -EINVAL;
else {
struct rlimit value;
task_lock(current->group_leader);
value = current->signal->rlim[resource];
task_unlock(current->group_leader);
return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
}
struct rlimit value;
int ret;
ret = do_prlimit(current, resource, NULL, &value);
if (!ret)
ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
return ret;
}
#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
@ -1272,44 +1271,89 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
#endif
SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
static inline bool rlim64_is_infinity(__u64 rlim64)
{
struct rlimit new_rlim, *old_rlim;
int retval;
#if BITS_PER_LONG < 64
return rlim64 >= ULONG_MAX;
#else
return rlim64 == RLIM64_INFINITY;
#endif
}
static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
{
if (rlim->rlim_cur == RLIM_INFINITY)
rlim64->rlim_cur = RLIM64_INFINITY;
else
rlim64->rlim_cur = rlim->rlim_cur;
if (rlim->rlim_max == RLIM_INFINITY)
rlim64->rlim_max = RLIM64_INFINITY;
else
rlim64->rlim_max = rlim->rlim_max;
}
static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
{
if (rlim64_is_infinity(rlim64->rlim_cur))
rlim->rlim_cur = RLIM_INFINITY;
else
rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
if (rlim64_is_infinity(rlim64->rlim_max))
rlim->rlim_max = RLIM_INFINITY;
else
rlim->rlim_max = (unsigned long)rlim64->rlim_max;
}
/* make sure you are allowed to change @tsk limits before calling this */
int do_prlimit(struct task_struct *tsk, unsigned int resource,
struct rlimit *new_rlim, struct rlimit *old_rlim)
{
struct rlimit *rlim;
int retval = 0;
if (resource >= RLIM_NLIMITS)
return -EINVAL;
if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
return -EFAULT;
if (new_rlim.rlim_cur > new_rlim.rlim_max)
return -EINVAL;
old_rlim = current->signal->rlim + resource;
if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
return -EPERM;
retval = security_task_setrlimit(resource, &new_rlim);
if (retval)
return retval;
if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
/*
* The caller is asking for an immediate RLIMIT_CPU
* expiry. But we use the zero value to mean "it was
* never set". So let's cheat and make it one second
* instead
*/
new_rlim.rlim_cur = 1;
if (new_rlim) {
if (new_rlim->rlim_cur > new_rlim->rlim_max)
return -EINVAL;
if (resource == RLIMIT_NOFILE &&
new_rlim->rlim_max > sysctl_nr_open)
return -EPERM;
}
task_lock(current->group_leader);
*old_rlim = new_rlim;
task_unlock(current->group_leader);
if (resource != RLIMIT_CPU)
/* protect tsk->signal and tsk->sighand from disappearing */
read_lock(&tasklist_lock);
if (!tsk->sighand) {
retval = -ESRCH;
goto out;
}
rlim = tsk->signal->rlim + resource;
task_lock(tsk->group_leader);
if (new_rlim) {
if (new_rlim->rlim_max > rlim->rlim_max &&
!capable(CAP_SYS_RESOURCE))
retval = -EPERM;
if (!retval)
retval = security_task_setrlimit(tsk->group_leader,
resource, new_rlim);
if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
/*
* The caller is asking for an immediate RLIMIT_CPU
* expiry. But we use the zero value to mean "it was
* never set". So let's cheat and make it one second
* instead
*/
new_rlim->rlim_cur = 1;
}
}
if (!retval) {
if (old_rlim)
*old_rlim = *rlim;
if (new_rlim)
*rlim = *new_rlim;
}
task_unlock(tsk->group_leader);
/*
* RLIMIT_CPU handling. Note that the kernel fails to return an error
@ -1317,14 +1361,84 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
* very long-standing error, and fixing it now risks breakage of
* applications, so we live with it
*/
if (new_rlim.rlim_cur == RLIM_INFINITY)
goto out;
update_rlimit_cpu(new_rlim.rlim_cur);
if (!retval && new_rlim && resource == RLIMIT_CPU &&
new_rlim->rlim_cur != RLIM_INFINITY)
update_rlimit_cpu(tsk, new_rlim->rlim_cur);
out:
read_unlock(&tasklist_lock);
return retval;
}
/* rcu lock must be held */
static int check_prlimit_permission(struct task_struct *task)
{
const struct cred *cred = current_cred(), *tcred;
tcred = __task_cred(task);
if ((cred->uid != tcred->euid ||
cred->uid != tcred->suid ||
cred->uid != tcred->uid ||
cred->gid != tcred->egid ||
cred->gid != tcred->sgid ||
cred->gid != tcred->gid) &&
!capable(CAP_SYS_RESOURCE)) {
return -EPERM;
}
return 0;
}
SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
const struct rlimit64 __user *, new_rlim,
struct rlimit64 __user *, old_rlim)
{
struct rlimit64 old64, new64;
struct rlimit old, new;
struct task_struct *tsk;
int ret;
if (new_rlim) {
if (copy_from_user(&new64, new_rlim, sizeof(new64)))
return -EFAULT;
rlim64_to_rlim(&new64, &new);
}
rcu_read_lock();
tsk = pid ? find_task_by_vpid(pid) : current;
if (!tsk) {
rcu_read_unlock();
return -ESRCH;
}
ret = check_prlimit_permission(tsk);
if (ret) {
rcu_read_unlock();
return ret;
}
get_task_struct(tsk);
rcu_read_unlock();
ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
old_rlim ? &old : NULL);
if (!ret && old_rlim) {
rlim_to_rlim64(&old, &old64);
if (copy_to_user(old_rlim, &old64, sizeof(old64)))
ret = -EFAULT;
}
put_task_struct(tsk);
return ret;
}
SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
struct rlimit new_rlim;
if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
return -EFAULT;
return do_prlimit(current, resource, &new_rlim, NULL);
}
/*
* It would make sense to put struct rusage in the task_struct,
* except that would make the task_struct be *really big*. After

View file

@ -411,7 +411,8 @@ static int cap_task_getioprio(struct task_struct *p)
return 0;
}
static int cap_task_setrlimit(unsigned int resource, struct rlimit *new_rlim)
static int cap_task_setrlimit(struct task_struct *p, unsigned int resource,
struct rlimit *new_rlim)
{
return 0;
}

View file

@ -780,9 +780,10 @@ int security_task_getioprio(struct task_struct *p)
return security_ops->task_getioprio(p);
}
int security_task_setrlimit(unsigned int resource, struct rlimit *new_rlim)
int security_task_setrlimit(struct task_struct *p, unsigned int resource,
struct rlimit *new_rlim)
{
return security_ops->task_setrlimit(resource, new_rlim);
return security_ops->task_setrlimit(p, resource, new_rlim);
}
int security_task_setscheduler(struct task_struct *p,

View file

@ -2284,12 +2284,15 @@ static void selinux_bprm_committing_creds(struct linux_binprm *bprm)
rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS,
PROCESS__RLIMITINH, NULL);
if (rc) {
/* protect against do_prlimit() */
task_lock(current);
for (i = 0; i < RLIM_NLIMITS; i++) {
rlim = current->signal->rlim + i;
initrlim = init_task.signal->rlim + i;
rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
}
update_rlimit_cpu(current->signal->rlim[RLIMIT_CPU].rlim_cur);
task_unlock(current);
update_rlimit_cpu(current, rlimit(RLIMIT_CPU));
}
}
@ -3333,16 +3336,17 @@ static int selinux_task_getioprio(struct task_struct *p)
return current_has_perm(p, PROCESS__GETSCHED);
}
static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim)
static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource,
struct rlimit *new_rlim)
{
struct rlimit *old_rlim = current->signal->rlim + resource;
struct rlimit *old_rlim = p->signal->rlim + resource;
/* Control the ability to change the hard limit (whether
lowering or raising it), so that the hard limit can
later be used as a safe reset point for the soft limit
upon context transitions. See selinux_bprm_committing_creds. */
if (old_rlim->rlim_max != new_rlim->rlim_max)
return current_has_perm(current, PROCESS__SETRLIMIT);
return current_has_perm(p, PROCESS__SETRLIMIT);
return 0;
}