From 80f4b770f4485ae65779c156e5afe3cc3fd854bb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 2 Sep 2015 16:12:17 +0530 Subject: [PATCH] proc: Usable inode numbers for the namespace file descriptors. Assign a unique proc inode to each namespace, and use that inode number to ensure we only allocate at most one proc inode for every namespace in proc. A single proc inode per namespace allows userspace to test to see if two processes are in the same namespace. This has been a long requested feature and only blocked because a naive implementation would put the id in a global space and would ultimately require having a namespace for the names of namespaces, making migration and certain virtualization tricks impossible. We still don't have per superblock inode numbers for proc, which appears necessary for application unaware checkpoint/restart and migrations (if the application is using namespace file descriptors) but that is now allowd by the design if it becomes important. I have preallocated the ipc and uts initial proc inode numbers so their structures can be statically initialized. Change-Id: I01d42c2f051fd74b1474d9d8378ccc78174cf3cf Signed-off-by: Eric W. Biederman Git-commit: f80cd4676998eb352d1a8b9df0f3663537a9ce93 Git-repo: https://android.googlesource.com/kernel/common/ [schikk@codeaurora.org: Resolved merge conflicts ] CRs-Fixed: 901628 Signed-off-by: Swetha Chikkaboraiah --- fs/mount.h | 1 + fs/namespace.c | 14 ++++++++++++++ fs/proc/namespaces.c | 24 ++++++++++++++---------- include/linux/ipc_namespace.h | 2 ++ include/linux/pid_namespace.h | 1 + include/linux/proc_fs.h | 7 ++++++- include/linux/user_namespace.h | 1 + include/linux/utsname.h | 1 + include/net/net_namespace.h | 2 ++ init/version.c | 2 ++ ipc/msgutil.c | 2 ++ ipc/namespace.c | 16 ++++++++++++++++ kernel/pid.c | 1 + kernel/pid_namespace.c | 5 +++++ kernel/user.c | 2 ++ kernel/user_namespace.c | 8 ++++++++ kernel/utsname.c | 17 ++++++++++++++++- net/core/net_namespace.c | 24 ++++++++++++++++++++++++ 18 files changed, 118 insertions(+), 12 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index 24710ceb763..9a5edb5f4ad 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -4,6 +4,7 @@ struct mnt_namespace { atomic_t count; + unsigned int proc_inum; struct mount * root; struct list_head list; struct user_namespace *user_ns; diff --git a/fs/namespace.c b/fs/namespace.c index 4f569aad60b..34d97cab0ee 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2233,6 +2233,7 @@ dput_out: static void free_mnt_ns(struct mnt_namespace *ns) { + proc_free_inum(ns->proc_inum); put_user_ns(ns->user_ns); kfree(ns); } @@ -2249,10 +2250,16 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) { struct mnt_namespace *new_ns; + int ret; new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); if (!new_ns) return ERR_PTR(-ENOMEM); + ret = proc_alloc_inum(&new_ns->proc_inum); + if (ret) { + kfree(new_ns); + return ERR_PTR(ret); + } new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); atomic_set(&new_ns->count, 1); new_ns->root = NULL; @@ -2752,10 +2759,17 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns) return 0; } +static unsigned int mntns_inum(void *ns) +{ + struct mnt_namespace *mnt_ns = ns; + return mnt_ns->proc_inum; +} + const struct proc_ns_operations mntns_operations = { .name = "mnt", .type = CLONE_NEWNS, .get = mntns_get, .put = mntns_put, .install = mntns_install, + .inum = mntns_inum, }; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index a5b5f0fddfc..08dfd6ad8f3 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -78,7 +78,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, return ERR_PTR(-ENOMEM); } - inode = new_inode(sb); + inode = iget_locked(sb, ns_ops->inum(ns)); if (!inode) { dput(dentry); ns_ops->put(ns); @@ -86,13 +86,17 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, } ei = PROC_I(inode); - inode->i_ino = get_next_ino(); - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &ns_inode_operations; - inode->i_mode = S_IFREG | S_IRUGO; - inode->i_fop = &ns_file_operations; - ei->ns_ops = ns_ops; - ei->ns = ns; + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = &ns_inode_operations; + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_fop = &ns_file_operations; + ei->ns_ops = ns_ops; + ei->ns = ns; + unlock_new_inode(inode); + } else { + ns_ops->put(ns); + } d_set_d_op(dentry, &ns_dentry_operations); result = d_instantiate_unique(dentry, inode); @@ -158,12 +162,12 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl if (!ns) goto out_put_task; - snprintf(name, sizeof(name), "%s", ns_ops->name); + snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); len = strlen(name); if (len > buflen) len = buflen; - if (copy_to_user(buffer, ns_ops->name, len)) + if (copy_to_user(buffer, name, len)) len = -EFAULT; ns_ops->put(ns); diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 8a297a5e794..0bd5a4e75ee 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -65,6 +65,8 @@ struct ipc_namespace { /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; + + unsigned int proc_inum; }; extern struct ipc_namespace init_ipc_ns; diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index b067bd8c49d..375527bab2c 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -34,6 +34,7 @@ struct pid_namespace { gid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ + unsigned int proc_inum; }; extern struct pid_namespace init_pid_ns; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 3c74127132f..6c890170af8 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -28,7 +28,11 @@ struct mm_struct; */ enum { - PROC_ROOT_INO = 1, + PROC_ROOT_INO = 1, + PROC_IPC_INIT_INO = 0xEFFFFFFFU, + PROC_UTS_INIT_INO = 0xEFFFFFFEU, + PROC_USER_INIT_INO = 0xEFFFFFFDU, + PROC_PID_INIT_INO = 0xEFFFFFFCU, }; /* @@ -263,6 +267,7 @@ struct proc_ns_operations { void *(*get)(struct task_struct *task); void (*put)(void *ns); int (*install)(struct nsproxy *nsproxy, void *ns); + unsigned int (*inum)(void *ns); }; extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index faf467944ba..5ecc9888c22 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -14,6 +14,7 @@ struct user_namespace { struct hlist_head uidhash_table[UIDHASH_SZ]; struct user_struct *creator; struct work_struct destroyer; + unsigned int proc_inum; }; extern struct user_namespace init_user_ns; diff --git a/include/linux/utsname.h b/include/linux/utsname.h index c714ed75eae..ae739d72638 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -52,6 +52,7 @@ struct uts_namespace { struct kref kref; struct new_utsname name; struct user_namespace *user_ns; + unsigned int proc_inum; }; extern struct uts_namespace init_uts_ns; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index ee547c14981..b1cd8b6916d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -52,6 +52,8 @@ struct net { struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* Use only net_mutex */ + unsigned int proc_inum; + struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; diff --git a/init/version.c b/init/version.c index 86fe0ccb997..58170f18912 100644 --- a/init/version.c +++ b/init/version.c @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef CONFIG_KALLSYMS #define version(a) Version_ ## a @@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = { .domainname = UTS_DOMAINNAME, }, .user_ns = &init_user_ns, + .proc_inum = PROC_UTS_INIT_INO, }; EXPORT_SYMBOL_GPL(init_uts_ns); diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 26143d377c9..6471f1bdae9 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "util.h" @@ -30,6 +31,7 @@ DEFINE_SPINLOCK(mq_lock); struct ipc_namespace init_ipc_ns = { .count = ATOMIC_INIT(1), .user_ns = &init_user_ns, + .proc_inum = PROC_IPC_INIT_INO, }; atomic_t nr_ipc_ns = ATOMIC_INIT(1); diff --git a/ipc/namespace.c b/ipc/namespace.c index ce0a647869b..cd7f7330d68 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, if (ns == NULL) return ERR_PTR(-ENOMEM); + err = proc_alloc_inum(&ns->proc_inum); + if (err) { + kfree(ns); + return ERR_PTR(err); + } + atomic_set(&ns->count, 1); err = mq_init_ns(ns); if (err) { + proc_free_inum(ns->proc_inum); kfree(ns); return ERR_PTR(err); } @@ -113,6 +120,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) */ ipcns_notify(IPCNS_REMOVED); put_user_ns(ns->user_ns); + proc_free_inum(ns->proc_inum); kfree(ns); } @@ -170,10 +178,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *ns) return 0; } +static unsigned int ipcns_inum(void *vp) +{ + struct ipc_namespace *ns = vp; + + return ns->proc_inum; +} + const struct proc_ns_operations ipcns_operations = { .name = "ipc", .type = CLONE_NEWIPC, .get = ipcns_get, .put = ipcns_put, .install = ipcns_install, + .inum = ipcns_inum, }; diff --git a/kernel/pid.c b/kernel/pid.c index 7acf5909415..b1e701b1cda 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -78,6 +78,7 @@ struct pid_namespace init_pid_ns = { .last_pid = 0, .level = 0, .child_reaper = &init_task, + .proc_inum = PROC_PID_INIT_INO, }; EXPORT_SYMBOL_GPL(init_pid_ns); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 57bc1fd35b3..4f15befb090 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -88,6 +88,10 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p if (ns->pid_cachep == NULL) goto out_free_map; + err = proc_alloc_inum(&ns->proc_inum); + if (err) + goto out_free_map; + kref_init(&ns->kref); ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); @@ -118,6 +122,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns) { int i; + proc_free_inum(ns->proc_inum); for (i = 0; i < PIDMAP_ENTRIES; i++) kfree(ns->pidmap[i].page); kmem_cache_free(pid_ns_cachep, ns); diff --git a/kernel/user.c b/kernel/user.c index 71dd2363ab0..9013a4fe0b1 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * userns count is 1 for root user, 1 for init_uts_ns, @@ -26,6 +27,7 @@ struct user_namespace init_user_ns = { .refcount = ATOMIC_INIT(3), }, .creator = &root_user, + .proc_inum = PROC_USER_INIT_INO, }; EXPORT_SYMBOL_GPL(init_user_ns); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 3b906e98b1d..c14b7b9fe41 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -27,11 +27,18 @@ int create_user_ns(struct cred *new) struct user_namespace *ns; struct user_struct *root_user; int n; + int ret; ns = kmem_cache_alloc(user_ns_cachep, GFP_KERNEL); if (!ns) return -ENOMEM; + ret = proc_alloc_inum(&ns->proc_inum); + if (ret) { + kmem_cache_free(user_ns_cachep, ns); + return ret; + } + kref_init(&ns->kref); for (n = 0; n < UIDHASH_SZ; ++n) @@ -73,6 +80,7 @@ static void free_user_ns_work(struct work_struct *work) struct user_namespace *ns = container_of(work, struct user_namespace, destroyer); free_uid(ns->creator); + proc_free_inum(ns->proc_inum); kmem_cache_free(user_ns_cachep, ns); } diff --git a/kernel/utsname.c b/kernel/utsname.c index 405caf91aad..ce3d44b4187 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -36,11 +36,18 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, struct uts_namespace *old_ns) { struct uts_namespace *ns; + int err; ns = create_uts_ns(); if (!ns) return ERR_PTR(-ENOMEM); + err = proc_alloc_inum(&ns->proc_inum); + if (err) { + kfree(ns); + return ERR_PTR(err); + } + down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns); @@ -78,6 +85,7 @@ void free_uts_ns(struct kref *kref) ns = container_of(kref, struct uts_namespace, kref); put_user_ns(ns->user_ns); + proc_free_inum(ns->proc_inum); kfree(ns); } @@ -110,11 +118,18 @@ static int utsns_install(struct nsproxy *nsproxy, void *ns) return 0; } +static unsigned int utsns_inum(void *vp) +{ + struct uts_namespace *ns = vp; + + return ns->proc_inum; +} + const struct proc_ns_operations utsns_operations = { .name = "uts", .type = CLONE_NEWUTS, .get = utsns_get, .put = utsns_put, .install = utsns_install, + .inum = utsns_inum, }; - diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 31a5ae51a45..335ab8875bf 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -376,6 +376,21 @@ struct net *get_net_ns_by_pid(pid_t pid) } EXPORT_SYMBOL_GPL(get_net_ns_by_pid); +static __net_init int net_ns_net_init(struct net *net) +{ + return proc_alloc_inum(&net->proc_inum); +} + +static __net_exit void net_ns_net_exit(struct net *net) +{ + proc_free_inum(net->proc_inum); +} + +static struct pernet_operations __net_initdata net_ns_ops = { + .init = net_ns_net_init, + .exit = net_ns_net_exit, +}; + static int __init net_ns_init(void) { struct net_generic *ng; @@ -407,6 +422,8 @@ static int __init net_ns_init(void) mutex_unlock(&net_mutex); + register_pernet_subsys(&net_ns_ops); + return 0; } @@ -630,11 +647,18 @@ static int netns_install(struct nsproxy *nsproxy, void *ns) return 0; } +static unsigned int netns_inum(void *ns) +{ + struct net *net = ns; + return net->proc_inum; +} + const struct proc_ns_operations netns_operations = { .name = "net", .type = CLONE_NEWNET, .get = netns_get, .put = netns_put, .install = netns_install, + .inum = netns_inum, }; #endif