android_kernel_samsung_msm8976/net/netfilter/x_tables.c

1683 lines
41 KiB
C
Raw Permalink Normal View History

/*
* x_tables core - Backend for {ip,ip6,arp}_tables
*
* Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org>
* Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* Based on existing ip_tables code which is
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
* Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
#include <linux/mm.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include <linux/audit.h>
#include <net/net_namespace.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_arp.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter_arp/arp_tables.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
struct compat_delta {
unsigned int offset; /* offset in kernel */
int delta; /* delta in 32bit user land */
};
struct xt_af {
struct mutex mutex;
struct list_head match;
struct list_head target;
#ifdef CONFIG_COMPAT
struct mutex compat_mutex;
struct compat_delta *compat_tab;
unsigned int number; /* number of slots in compat_tab[] */
unsigned int cur; /* number of used slots in compat_tab[] */
#endif
};
static struct xt_af *xt;
static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
[NFPROTO_UNSPEC] = "x",
[NFPROTO_IPV4] = "ip",
[NFPROTO_ARP] = "arp",
[NFPROTO_BRIDGE] = "eb",
[NFPROTO_IPV6] = "ip6",
};
/* Allow this many total (re)entries. */
static const unsigned int xt_jumpstack_multiplier = 2;
/* Registration hooks for targets. */
int
xt_register_target(struct xt_target *target)
{
u_int8_t af = target->family;
int ret;
ret = mutex_lock_interruptible(&xt[af].mutex);
if (ret != 0)
return ret;
list_add(&target->list, &xt[af].target);
mutex_unlock(&xt[af].mutex);
return ret;
}
EXPORT_SYMBOL(xt_register_target);
void
xt_unregister_target(struct xt_target *target)
{
u_int8_t af = target->family;
mutex_lock(&xt[af].mutex);
list_del(&target->list);
mutex_unlock(&xt[af].mutex);
}
EXPORT_SYMBOL(xt_unregister_target);
int
xt_register_targets(struct xt_target *target, unsigned int n)
{
unsigned int i;
int err = 0;
for (i = 0; i < n; i++) {
err = xt_register_target(&target[i]);
if (err)
goto err;
}
return err;
err:
if (i > 0)
xt_unregister_targets(target, i);
return err;
}
EXPORT_SYMBOL(xt_register_targets);
void
xt_unregister_targets(struct xt_target *target, unsigned int n)
{
while (n-- > 0)
xt_unregister_target(&target[n]);
}
EXPORT_SYMBOL(xt_unregister_targets);
int
xt_register_match(struct xt_match *match)
{
u_int8_t af = match->family;
int ret;
ret = mutex_lock_interruptible(&xt[af].mutex);
if (ret != 0)
return ret;
list_add(&match->list, &xt[af].match);
mutex_unlock(&xt[af].mutex);
return ret;
}
EXPORT_SYMBOL(xt_register_match);
void
xt_unregister_match(struct xt_match *match)
{
u_int8_t af = match->family;
mutex_lock(&xt[af].mutex);
list_del(&match->list);
mutex_unlock(&xt[af].mutex);
}
EXPORT_SYMBOL(xt_unregister_match);
int
xt_register_matches(struct xt_match *match, unsigned int n)
{
unsigned int i;
int err = 0;
for (i = 0; i < n; i++) {
err = xt_register_match(&match[i]);
if (err)
goto err;
}
return err;
err:
if (i > 0)
xt_unregister_matches(match, i);
return err;
}
EXPORT_SYMBOL(xt_register_matches);
void
xt_unregister_matches(struct xt_match *match, unsigned int n)
{
while (n-- > 0)
xt_unregister_match(&match[n]);
}
EXPORT_SYMBOL(xt_unregister_matches);
/*
* These are weird, but module loading must not be done with mutex
* held (since they will register), and we have to have a single
* function to use.
*/
/* Find match, grabs ref. Returns ERR_PTR() on error. */
struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
{
struct xt_match *m;
int err = -ENOENT;
if (mutex_lock_interruptible(&xt[af].mutex) != 0)
return ERR_PTR(-EINTR);
list_for_each_entry(m, &xt[af].match, list) {
if (strcmp(m->name, name) == 0) {
if (m->revision == revision) {
if (try_module_get(m->me)) {
mutex_unlock(&xt[af].mutex);
return m;
}
} else
err = -EPROTOTYPE; /* Found something. */
}
}
mutex_unlock(&xt[af].mutex);
if (af != NFPROTO_UNSPEC)
/* Try searching again in the family-independent list */
return xt_find_match(NFPROTO_UNSPEC, name, revision);
return ERR_PTR(err);
}
EXPORT_SYMBOL(xt_find_match);
struct xt_match *
xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
{
struct xt_match *match;
match = xt_find_match(nfproto, name, revision);
if (IS_ERR(match)) {
request_module("%st_%s", xt_prefix[nfproto], name);
match = xt_find_match(nfproto, name, revision);
}
return match;
}
EXPORT_SYMBOL_GPL(xt_request_find_match);
/* Find target, grabs ref. Returns ERR_PTR() on error. */
struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
{
struct xt_target *t;
int err = -ENOENT;
if (mutex_lock_interruptible(&xt[af].mutex) != 0)
return ERR_PTR(-EINTR);
list_for_each_entry(t, &xt[af].target, list) {
if (strcmp(t->name, name) == 0) {
if (t->revision == revision) {
if (try_module_get(t->me)) {
mutex_unlock(&xt[af].mutex);
return t;
}
} else
err = -EPROTOTYPE; /* Found something. */
}
}
mutex_unlock(&xt[af].mutex);
if (af != NFPROTO_UNSPEC)
/* Try searching again in the family-independent list */
return xt_find_target(NFPROTO_UNSPEC, name, revision);
return ERR_PTR(err);
}
EXPORT_SYMBOL(xt_find_target);
struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
{
struct xt_target *target;
target = xt_find_target(af, name, revision);
if (IS_ERR(target)) {
request_module("%st_%s", xt_prefix[af], name);
target = xt_find_target(af, name, revision);
}
return target;
}
EXPORT_SYMBOL_GPL(xt_request_find_target);
static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
{
const struct xt_match *m;
int have_rev = 0;
list_for_each_entry(m, &xt[af].match, list) {
if (strcmp(m->name, name) == 0) {
if (m->revision > *bestp)
*bestp = m->revision;
if (m->revision == revision)
have_rev = 1;
}
}
if (af != NFPROTO_UNSPEC && !have_rev)
return match_revfn(NFPROTO_UNSPEC, name, revision, bestp);
return have_rev;
}
static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
{
const struct xt_target *t;
int have_rev = 0;
list_for_each_entry(t, &xt[af].target, list) {
if (strcmp(t->name, name) == 0) {
if (t->revision > *bestp)
*bestp = t->revision;
if (t->revision == revision)
have_rev = 1;
}
}
if (af != NFPROTO_UNSPEC && !have_rev)
return target_revfn(NFPROTO_UNSPEC, name, revision, bestp);
return have_rev;
}
/* Returns true or false (if no such extension at all) */
int xt_find_revision(u8 af, const char *name, u8 revision, int target,
int *err)
{
int have_rev, best = -1;
if (mutex_lock_interruptible(&xt[af].mutex) != 0) {
*err = -EINTR;
return 1;
}
if (target == 1)
have_rev = target_revfn(af, name, revision, &best);
else
have_rev = match_revfn(af, name, revision, &best);
mutex_unlock(&xt[af].mutex);
/* Nothing at all? Return 0 to try loading module. */
if (best == -1) {
*err = -ENOENT;
return 0;
}
*err = best;
if (!have_rev)
*err = -EPROTONOSUPPORT;
return 1;
}
EXPORT_SYMBOL_GPL(xt_find_revision);
static char *
textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
{
static const char *const inetbr_names[] = {
"PREROUTING", "INPUT", "FORWARD",
"OUTPUT", "POSTROUTING", "BROUTING",
};
static const char *const arp_names[] = {
"INPUT", "FORWARD", "OUTPUT",
};
const char *const *names;
unsigned int i, max;
char *p = buf;
bool np = false;
int res;
names = (nfproto == NFPROTO_ARP) ? arp_names : inetbr_names;
max = (nfproto == NFPROTO_ARP) ? ARRAY_SIZE(arp_names) :
ARRAY_SIZE(inetbr_names);
*p = '\0';
for (i = 0; i < max; ++i) {
if (!(mask & (1 << i)))
continue;
res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
if (res > 0) {
size -= res;
p += res;
}
np = true;
}
return buf;
}
/**
* xt_check_proc_name - check that name is suitable for /proc file creation
*
* @name: file name candidate
* @size: length of buffer
*
* some x_tables modules wish to create a file in /proc.
* This function makes sure that the name is suitable for this
* purpose, it checks that name is NUL terminated and isn't a 'special'
* name, like "..".
*
* returns negative number on error or 0 if name is useable.
*/
int xt_check_proc_name(const char *name, unsigned int size)
{
if (name[0] == '\0')
return -EINVAL;
if (strnlen(name, size) == size)
return -ENAMETOOLONG;
if (strcmp(name, ".") == 0 ||
strcmp(name, "..") == 0 ||
strchr(name, '/'))
return -EINVAL;
return 0;
}
EXPORT_SYMBOL(xt_check_proc_name);
int xt_check_match(struct xt_mtchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
int ret;
if (XT_ALIGN(par->match->matchsize) != size &&
par->match->matchsize != -1) {
/*
* ebt_among is exempt from centralized matchsize checking
* because it uses a dynamic-size data set.
*/
pr_err("%s_tables: %s.%u match: invalid size "
"%u (kernel) != (user) %u\n",
xt_prefix[par->family], par->match->name,
par->match->revision,
XT_ALIGN(par->match->matchsize), size);
return -EINVAL;
}
if (par->match->table != NULL &&
strcmp(par->match->table, par->table) != 0) {
pr_err("%s_tables: %s match: only valid in %s table, not %s\n",
xt_prefix[par->family], par->match->name,
par->match->table, par->table);
return -EINVAL;
}
if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
char used[64], allow[64];
pr_err("%s_tables: %s match: used from hooks %s, but only "
"valid from %s\n",
xt_prefix[par->family], par->match->name,
textify_hooks(used, sizeof(used), par->hook_mask,
par->family),
textify_hooks(allow, sizeof(allow), par->match->hooks,
par->family));
return -EINVAL;
}
if (par->match->proto && (par->match->proto != proto || inv_proto)) {
pr_err("%s_tables: %s match: only valid for protocol %u\n",
xt_prefix[par->family], par->match->name,
par->match->proto);
return -EINVAL;
}
if (par->match->checkentry != NULL) {
ret = par->match->checkentry(par);
if (ret < 0)
return ret;
else if (ret > 0)
/* Flag up potential errors. */
return -EIO;
}
return 0;
}
EXPORT_SYMBOL_GPL(xt_check_match);
/** xt_check_entry_match - check that matches end before start of target
*
* @match: beginning of xt_entry_match
* @target: beginning of this rules target (alleged end of matches)
* @alignment: alignment requirement of match structures
*
* Validates that all matches add up to the beginning of the target,
* and that each match covers at least the base structure size.
*
* Return: 0 on success, negative errno on failure.
*/
static int xt_check_entry_match(const char *match, const char *target,
const size_t alignment)
{
const struct xt_entry_match *pos;
int length = target - match;
if (length == 0) /* no matches */
return 0;
pos = (struct xt_entry_match *)match;
do {
if ((unsigned long)pos % alignment)
return -EINVAL;
if (length < (int)sizeof(struct xt_entry_match))
return -EINVAL;
if (pos->u.match_size < sizeof(struct xt_entry_match))
return -EINVAL;
if (pos->u.match_size > length)
return -EINVAL;
length -= pos->u.match_size;
pos = ((void *)((char *)(pos) + (pos)->u.match_size));
} while (length > 0);
return 0;
}
#ifdef CONFIG_COMPAT
int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
{
struct xt_af *xp = &xt[af];
if (!xp->compat_tab) {
if (!xp->number)
return -EINVAL;
xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
if (!xp->compat_tab)
return -ENOMEM;
xp->cur = 0;
}
if (xp->cur >= xp->number)
return -EINVAL;
if (xp->cur)
delta += xp->compat_tab[xp->cur - 1].delta;
xp->compat_tab[xp->cur].offset = offset;
xp->compat_tab[xp->cur].delta = delta;
xp->cur++;
return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_add_offset);
void xt_compat_flush_offsets(u_int8_t af)
{
if (xt[af].compat_tab) {
vfree(xt[af].compat_tab);
xt[af].compat_tab = NULL;
xt[af].number = 0;
xt[af].cur = 0;
}
}
EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
{
struct compat_delta *tmp = xt[af].compat_tab;
int mid, left = 0, right = xt[af].cur - 1;
while (left <= right) {
mid = (left + right) >> 1;
if (offset > tmp[mid].offset)
left = mid + 1;
else if (offset < tmp[mid].offset)
right = mid - 1;
else
return mid ? tmp[mid - 1].delta : 0;
}
return left ? tmp[left - 1].delta : 0;
}
EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
void xt_compat_init_offsets(u_int8_t af, unsigned int number)
{
xt[af].number = number;
xt[af].cur = 0;
}
EXPORT_SYMBOL(xt_compat_init_offsets);
int xt_compat_match_offset(const struct xt_match *match)
{
u_int16_t csize = match->compatsize ? : match->matchsize;
return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize);
}
EXPORT_SYMBOL_GPL(xt_compat_match_offset);
void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
unsigned int *size)
{
const struct xt_match *match = m->u.kernel.match;
struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
int pad, off = xt_compat_match_offset(match);
u_int16_t msize = cm->u.user.match_size;
netfilter: x_tables: do compat validation via translate_table commit 09d9686047dbbe1cf4faa558d3ecc4aae2046054 upstream. This looks like refactoring, but its also a bug fix. Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few sanity tests that are done in the normal path. For example, we do not check for underflows and the base chain policies. While its possible to also add such checks to the compat path, its more copy&pastry, for instance we cannot reuse check_underflow() helper as e->target_offset differs in the compat case. Other problem is that it makes auditing for validation errors harder; two places need to be checked and kept in sync. At a high level 32 bit compat works like this: 1- initial pass over blob: validate match/entry offsets, bounds checking lookup all matches and targets do bookkeeping wrt. size delta of 32/64bit structures assign match/target.u.kernel pointer (points at kernel implementation, needed to access ->compatsize etc.) 2- allocate memory according to the total bookkeeping size to contain the translated ruleset 3- second pass over original blob: for each entry, copy the 32bit representation to the newly allocated memory. This also does any special match translations (e.g. adjust 32bit to 64bit longs, etc). 4- check if ruleset is free of loops (chase all jumps) 5-first pass over translated blob: call the checkentry function of all matches and targets. The alternative implemented by this patch is to drop steps 3&4 from the compat process, the translation is changed into an intermediate step rather than a full 1:1 translate_table replacement. In the 2nd pass (step #3), change the 64bit ruleset back to a kernel representation, i.e. put() the kernel pointer and restore ->u.user.name . This gets us a 64bit ruleset that is in the format generated by a 64bit iptables userspace -- we can then use translate_table() to get the 'native' sanity checks. This has two drawbacks: 1. we re-validate all the match and target entry structure sizes even though compat translation is supposed to never generate bogus offsets. 2. we put and then re-lookup each match and target. THe upside is that we get all sanity tests and ruleset validations provided by the normal path and can remove some duplicated compat code. iptables-restore time of autogenerated ruleset with 300k chains of form -A CHAIN0001 -m limit --limit 1/s -j CHAIN0002 -A CHAIN0002 -m limit --limit 1/s -j CHAIN0003 shows no noticeable differences in restore times: old: 0m30.796s new: 0m31.521s 64bit: 0m25.674s Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Willy Tarreau <w@1wt.eu>
2016-04-01 12:17:34 +00:00
char name[sizeof(m->u.user.name)];
m = *dstptr;
memcpy(m, cm, sizeof(*cm));
if (match->compat_from_user)
match->compat_from_user(m->data, cm->data);
else
memcpy(m->data, cm->data, msize - sizeof(*cm));
pad = XT_ALIGN(match->matchsize) - match->matchsize;
if (pad > 0)
memset(m->data + match->matchsize, 0, pad);
msize += off;
m->u.user.match_size = msize;
netfilter: x_tables: do compat validation via translate_table commit 09d9686047dbbe1cf4faa558d3ecc4aae2046054 upstream. This looks like refactoring, but its also a bug fix. Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few sanity tests that are done in the normal path. For example, we do not check for underflows and the base chain policies. While its possible to also add such checks to the compat path, its more copy&pastry, for instance we cannot reuse check_underflow() helper as e->target_offset differs in the compat case. Other problem is that it makes auditing for validation errors harder; two places need to be checked and kept in sync. At a high level 32 bit compat works like this: 1- initial pass over blob: validate match/entry offsets, bounds checking lookup all matches and targets do bookkeeping wrt. size delta of 32/64bit structures assign match/target.u.kernel pointer (points at kernel implementation, needed to access ->compatsize etc.) 2- allocate memory according to the total bookkeeping size to contain the translated ruleset 3- second pass over original blob: for each entry, copy the 32bit representation to the newly allocated memory. This also does any special match translations (e.g. adjust 32bit to 64bit longs, etc). 4- check if ruleset is free of loops (chase all jumps) 5-first pass over translated blob: call the checkentry function of all matches and targets. The alternative implemented by this patch is to drop steps 3&4 from the compat process, the translation is changed into an intermediate step rather than a full 1:1 translate_table replacement. In the 2nd pass (step #3), change the 64bit ruleset back to a kernel representation, i.e. put() the kernel pointer and restore ->u.user.name . This gets us a 64bit ruleset that is in the format generated by a 64bit iptables userspace -- we can then use translate_table() to get the 'native' sanity checks. This has two drawbacks: 1. we re-validate all the match and target entry structure sizes even though compat translation is supposed to never generate bogus offsets. 2. we put and then re-lookup each match and target. THe upside is that we get all sanity tests and ruleset validations provided by the normal path and can remove some duplicated compat code. iptables-restore time of autogenerated ruleset with 300k chains of form -A CHAIN0001 -m limit --limit 1/s -j CHAIN0002 -A CHAIN0002 -m limit --limit 1/s -j CHAIN0003 shows no noticeable differences in restore times: old: 0m30.796s new: 0m31.521s 64bit: 0m25.674s Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Willy Tarreau <w@1wt.eu>
2016-04-01 12:17:34 +00:00
strlcpy(name, match->name, sizeof(name));
module_put(match->me);
strncpy(m->u.user.name, name, sizeof(m->u.user.name));
*size += off;
*dstptr += msize;
}
EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
int xt_compat_match_to_user(const struct xt_entry_match *m,
void __user **dstptr, unsigned int *size)
{
const struct xt_match *match = m->u.kernel.match;
struct compat_xt_entry_match __user *cm = *dstptr;
int off = xt_compat_match_offset(match);
u_int16_t msize = m->u.user.match_size - off;
if (copy_to_user(cm, m, sizeof(*cm)) ||
put_user(msize, &cm->u.user.match_size) ||
copy_to_user(cm->u.user.name, m->u.kernel.match->name,
strlen(m->u.kernel.match->name) + 1))
return -EFAULT;
if (match->compat_to_user) {
if (match->compat_to_user((void __user *)cm->data, m->data))
return -EFAULT;
} else {
if (copy_to_user(cm->data, m->data, msize - sizeof(*cm)))
return -EFAULT;
}
*size -= off;
*dstptr += msize;
return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
/* non-compat version may have padding after verdict */
struct compat_xt_standard_target {
struct compat_xt_entry_target t;
compat_uint_t verdict;
};
int xt_compat_check_entry_offsets(const void *base, const char *elems,
unsigned int target_offset,
unsigned int next_offset)
{
long size_of_base_struct = elems - (const char *)base;
const struct compat_xt_entry_target *t;
const char *e = base;
if (target_offset < size_of_base_struct)
return -EINVAL;
if (target_offset + sizeof(*t) > next_offset)
return -EINVAL;
t = (void *)(e + target_offset);
if (t->u.target_size < sizeof(*t))
return -EINVAL;
if (target_offset + t->u.target_size > next_offset)
return -EINVAL;
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset)
return -EINVAL;
/* compat_xt_entry match has less strict aligment requirements,
* otherwise they are identical. In case of padding differences
* we need to add compat version of xt_check_entry_match.
*/
BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match));
return xt_check_entry_match(elems, base + target_offset,
__alignof__(struct compat_xt_entry_match));
}
EXPORT_SYMBOL(xt_compat_check_entry_offsets);
#endif /* CONFIG_COMPAT */
/**
* xt_check_entry_offsets - validate arp/ip/ip6t_entry
*
* @base: pointer to arp/ip/ip6t_entry
* @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems
* @target_offset: the arp/ip/ip6_t->target_offset
* @next_offset: the arp/ip/ip6_t->next_offset
*
* validates that target_offset and next_offset are sane and that all
* match sizes (if any) align with the target offset.
*
* This function does not validate the targets or matches themselves, it
* only tests that all the offsets and sizes are correct, that all
* match structures are aligned, and that the last structure ends where
* the target structure begins.
*
* Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version.
*
* The arp/ip/ip6t_entry structure @base must have passed following tests:
* - it must point to a valid memory location
* - base to base + next_offset must be accessible, i.e. not exceed allocated
* length.
*
* A well-formed entry looks like this:
*
* ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry
* e->elems[]-----' | |
* matchsize | |
* matchsize | |
* | |
* target_offset---------------------------------' |
* next_offset---------------------------------------------------'
*
* elems[]: flexible array member at end of ip(6)/arpt_entry struct.
* This is where matches (if any) and the target reside.
* target_offset: beginning of target.
* next_offset: start of the next rule; also: size of this rule.
* Since targets have a minimum size, target_offset + minlen <= next_offset.
*
* Every match stores its size, sum of sizes must not exceed target_offset.
*
* Return: 0 on success, negative errno on failure.
*/
int xt_check_entry_offsets(const void *base,
const char *elems,
unsigned int target_offset,
unsigned int next_offset)
{
long size_of_base_struct = elems - (const char *)base;
const struct xt_entry_target *t;
const char *e = base;
/* target start is within the ip/ip6/arpt_entry struct */
if (target_offset < size_of_base_struct)
return -EINVAL;
if (target_offset + sizeof(*t) > next_offset)
return -EINVAL;
t = (void *)(e + target_offset);
if (t->u.target_size < sizeof(*t))
return -EINVAL;
if (target_offset + t->u.target_size > next_offset)
return -EINVAL;
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset)
return -EINVAL;
return xt_check_entry_match(elems, base + target_offset,
__alignof__(struct xt_entry_match));
}
EXPORT_SYMBOL(xt_check_entry_offsets);
int xt_check_target(struct xt_tgchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
int ret;
if (XT_ALIGN(par->target->targetsize) != size) {
pr_err("%s_tables: %s.%u target: invalid size "
"%u (kernel) != (user) %u\n",
xt_prefix[par->family], par->target->name,
par->target->revision,
XT_ALIGN(par->target->targetsize), size);
return -EINVAL;
}
if (par->target->table != NULL &&
strcmp(par->target->table, par->table) != 0) {
pr_err("%s_tables: %s target: only valid in %s table, not %s\n",
xt_prefix[par->family], par->target->name,
par->target->table, par->table);
return -EINVAL;
}
if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
char used[64], allow[64];
pr_err("%s_tables: %s target: used from hooks %s, but only "
"usable from %s\n",
xt_prefix[par->family], par->target->name,
textify_hooks(used, sizeof(used), par->hook_mask,
par->family),
textify_hooks(allow, sizeof(allow), par->target->hooks,
par->family));
return -EINVAL;
}
if (par->target->proto && (par->target->proto != proto || inv_proto)) {
pr_err("%s_tables: %s target: only valid for protocol %u\n",
xt_prefix[par->family], par->target->name,
par->target->proto);
return -EINVAL;
}
if (par->target->checkentry != NULL) {
ret = par->target->checkentry(par);
if (ret < 0)
return ret;
else if (ret > 0)
/* Flag up potential errors. */
return -EIO;
}
return 0;
}
EXPORT_SYMBOL_GPL(xt_check_target);
/**
* xt_copy_counters_from_user - copy counters and metadata from userspace
*
* @user: src pointer to userspace memory
* @len: alleged size of userspace memory
* @info: where to store the xt_counters_info metadata
* @compat: true if we setsockopt call is done by 32bit task on 64bit kernel
*
* Copies counter meta data from @user and stores it in @info.
*
* vmallocs memory to hold the counters, then copies the counter data
* from @user to the new memory and returns a pointer to it.
*
* If @compat is true, @info gets converted automatically to the 64bit
* representation.
*
* The metadata associated with the counters is stored in @info.
*
* Return: returns pointer that caller has to test via IS_ERR().
* If IS_ERR is false, caller has to vfree the pointer.
*/
void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
struct xt_counters_info *info, bool compat)
{
void *mem;
u64 size;
#ifdef CONFIG_COMPAT
if (compat) {
/* structures only differ in size due to alignment */
struct compat_xt_counters_info compat_tmp;
if (len <= sizeof(compat_tmp))
return ERR_PTR(-EINVAL);
len -= sizeof(compat_tmp);
if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
return ERR_PTR(-EFAULT);
memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
info->num_counters = compat_tmp.num_counters;
user += sizeof(compat_tmp);
} else
#endif
{
if (len <= sizeof(*info))
return ERR_PTR(-EINVAL);
len -= sizeof(*info);
if (copy_from_user(info, user, sizeof(*info)) != 0)
return ERR_PTR(-EFAULT);
user += sizeof(*info);
}
info->name[sizeof(info->name) - 1] = '\0';
size = sizeof(struct xt_counters);
size *= info->num_counters;
if (size != (u64)len)
return ERR_PTR(-EINVAL);
mem = vmalloc(len);
if (!mem)
return ERR_PTR(-ENOMEM);
if (copy_from_user(mem, user, len) == 0)
return mem;
vfree(mem);
return ERR_PTR(-EFAULT);
}
EXPORT_SYMBOL_GPL(xt_copy_counters_from_user);
#ifdef CONFIG_COMPAT
int xt_compat_target_offset(const struct xt_target *target)
{
u_int16_t csize = target->compatsize ? : target->targetsize;
return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize);
}
EXPORT_SYMBOL_GPL(xt_compat_target_offset);
void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
[NETFILTER]: {ip,arp,ip6}_tables: fix sparse warnings in compat code CHECK net/ipv4/netfilter/ip_tables.c net/ipv4/netfilter/ip_tables.c:1453:8: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/ip_tables.c:1453:8: expected int *size net/ipv4/netfilter/ip_tables.c:1453:8: got unsigned int [usertype] *size net/ipv4/netfilter/ip_tables.c:1458:44: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/ip_tables.c:1458:44: expected int *size net/ipv4/netfilter/ip_tables.c:1458:44: got unsigned int [usertype] *size net/ipv4/netfilter/ip_tables.c:1603:2: warning: incorrect type in argument 2 (different signedness) net/ipv4/netfilter/ip_tables.c:1603:2: expected unsigned int *i net/ipv4/netfilter/ip_tables.c:1603:2: got int *<noident> net/ipv4/netfilter/ip_tables.c:1627:8: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/ip_tables.c:1627:8: expected int *size net/ipv4/netfilter/ip_tables.c:1627:8: got unsigned int *size net/ipv4/netfilter/ip_tables.c:1634:40: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/ip_tables.c:1634:40: expected int *size net/ipv4/netfilter/ip_tables.c:1634:40: got unsigned int *size net/ipv4/netfilter/ip_tables.c:1653:8: warning: incorrect type in argument 5 (different signedness) net/ipv4/netfilter/ip_tables.c:1653:8: expected unsigned int *i net/ipv4/netfilter/ip_tables.c:1653:8: got int *<noident> net/ipv4/netfilter/ip_tables.c:1666:2: warning: incorrect type in argument 2 (different signedness) net/ipv4/netfilter/ip_tables.c:1666:2: expected unsigned int *i net/ipv4/netfilter/ip_tables.c:1666:2: got int *<noident> CHECK net/ipv4/netfilter/arp_tables.c net/ipv4/netfilter/arp_tables.c:1285:40: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/arp_tables.c:1285:40: expected int *size net/ipv4/netfilter/arp_tables.c:1285:40: got unsigned int *size net/ipv4/netfilter/arp_tables.c:1543:44: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/arp_tables.c:1543:44: expected int *size net/ipv4/netfilter/arp_tables.c:1543:44: got unsigned int [usertype] *size CHECK net/ipv6/netfilter/ip6_tables.c net/ipv6/netfilter/ip6_tables.c:1481:8: warning: incorrect type in argument 3 (different signedness) net/ipv6/netfilter/ip6_tables.c:1481:8: expected int *size net/ipv6/netfilter/ip6_tables.c:1481:8: got unsigned int [usertype] *size net/ipv6/netfilter/ip6_tables.c:1486:44: warning: incorrect type in argument 3 (different signedness) net/ipv6/netfilter/ip6_tables.c:1486:44: expected int *size net/ipv6/netfilter/ip6_tables.c:1486:44: got unsigned int [usertype] *size net/ipv6/netfilter/ip6_tables.c:1631:2: warning: incorrect type in argument 2 (different signedness) net/ipv6/netfilter/ip6_tables.c:1631:2: expected unsigned int *i net/ipv6/netfilter/ip6_tables.c:1631:2: got int *<noident> net/ipv6/netfilter/ip6_tables.c:1655:8: warning: incorrect type in argument 3 (different signedness) net/ipv6/netfilter/ip6_tables.c:1655:8: expected int *size net/ipv6/netfilter/ip6_tables.c:1655:8: got unsigned int *size net/ipv6/netfilter/ip6_tables.c:1662:40: warning: incorrect type in argument 3 (different signedness) net/ipv6/netfilter/ip6_tables.c:1662:40: expected int *size net/ipv6/netfilter/ip6_tables.c:1662:40: got unsigned int *size net/ipv6/netfilter/ip6_tables.c:1680:8: warning: incorrect type in argument 5 (different signedness) net/ipv6/netfilter/ip6_tables.c:1680:8: expected unsigned int *i net/ipv6/netfilter/ip6_tables.c:1680:8: got int *<noident> net/ipv6/netfilter/ip6_tables.c:1693:2: warning: incorrect type in argument 2 (different signedness) net/ipv6/netfilter/ip6_tables.c:1693:2: expected unsigned int *i net/ipv6/netfilter/ip6_tables.c:1693:2: got int *<noident> Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-01-31 12:10:18 +00:00
unsigned int *size)
{
const struct xt_target *target = t->u.kernel.target;
struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
int pad, off = xt_compat_target_offset(target);
u_int16_t tsize = ct->u.user.target_size;
netfilter: x_tables: do compat validation via translate_table commit 09d9686047dbbe1cf4faa558d3ecc4aae2046054 upstream. This looks like refactoring, but its also a bug fix. Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few sanity tests that are done in the normal path. For example, we do not check for underflows and the base chain policies. While its possible to also add such checks to the compat path, its more copy&pastry, for instance we cannot reuse check_underflow() helper as e->target_offset differs in the compat case. Other problem is that it makes auditing for validation errors harder; two places need to be checked and kept in sync. At a high level 32 bit compat works like this: 1- initial pass over blob: validate match/entry offsets, bounds checking lookup all matches and targets do bookkeeping wrt. size delta of 32/64bit structures assign match/target.u.kernel pointer (points at kernel implementation, needed to access ->compatsize etc.) 2- allocate memory according to the total bookkeeping size to contain the translated ruleset 3- second pass over original blob: for each entry, copy the 32bit representation to the newly allocated memory. This also does any special match translations (e.g. adjust 32bit to 64bit longs, etc). 4- check if ruleset is free of loops (chase all jumps) 5-first pass over translated blob: call the checkentry function of all matches and targets. The alternative implemented by this patch is to drop steps 3&4 from the compat process, the translation is changed into an intermediate step rather than a full 1:1 translate_table replacement. In the 2nd pass (step #3), change the 64bit ruleset back to a kernel representation, i.e. put() the kernel pointer and restore ->u.user.name . This gets us a 64bit ruleset that is in the format generated by a 64bit iptables userspace -- we can then use translate_table() to get the 'native' sanity checks. This has two drawbacks: 1. we re-validate all the match and target entry structure sizes even though compat translation is supposed to never generate bogus offsets. 2. we put and then re-lookup each match and target. THe upside is that we get all sanity tests and ruleset validations provided by the normal path and can remove some duplicated compat code. iptables-restore time of autogenerated ruleset with 300k chains of form -A CHAIN0001 -m limit --limit 1/s -j CHAIN0002 -A CHAIN0002 -m limit --limit 1/s -j CHAIN0003 shows no noticeable differences in restore times: old: 0m30.796s new: 0m31.521s 64bit: 0m25.674s Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Willy Tarreau <w@1wt.eu>
2016-04-01 12:17:34 +00:00
char name[sizeof(t->u.user.name)];
t = *dstptr;
memcpy(t, ct, sizeof(*ct));
if (target->compat_from_user)
target->compat_from_user(t->data, ct->data);
else
memcpy(t->data, ct->data, tsize - sizeof(*ct));
pad = XT_ALIGN(target->targetsize) - target->targetsize;
if (pad > 0)
memset(t->data + target->targetsize, 0, pad);
tsize += off;
t->u.user.target_size = tsize;
netfilter: x_tables: do compat validation via translate_table commit 09d9686047dbbe1cf4faa558d3ecc4aae2046054 upstream. This looks like refactoring, but its also a bug fix. Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few sanity tests that are done in the normal path. For example, we do not check for underflows and the base chain policies. While its possible to also add such checks to the compat path, its more copy&pastry, for instance we cannot reuse check_underflow() helper as e->target_offset differs in the compat case. Other problem is that it makes auditing for validation errors harder; two places need to be checked and kept in sync. At a high level 32 bit compat works like this: 1- initial pass over blob: validate match/entry offsets, bounds checking lookup all matches and targets do bookkeeping wrt. size delta of 32/64bit structures assign match/target.u.kernel pointer (points at kernel implementation, needed to access ->compatsize etc.) 2- allocate memory according to the total bookkeeping size to contain the translated ruleset 3- second pass over original blob: for each entry, copy the 32bit representation to the newly allocated memory. This also does any special match translations (e.g. adjust 32bit to 64bit longs, etc). 4- check if ruleset is free of loops (chase all jumps) 5-first pass over translated blob: call the checkentry function of all matches and targets. The alternative implemented by this patch is to drop steps 3&4 from the compat process, the translation is changed into an intermediate step rather than a full 1:1 translate_table replacement. In the 2nd pass (step #3), change the 64bit ruleset back to a kernel representation, i.e. put() the kernel pointer and restore ->u.user.name . This gets us a 64bit ruleset that is in the format generated by a 64bit iptables userspace -- we can then use translate_table() to get the 'native' sanity checks. This has two drawbacks: 1. we re-validate all the match and target entry structure sizes even though compat translation is supposed to never generate bogus offsets. 2. we put and then re-lookup each match and target. THe upside is that we get all sanity tests and ruleset validations provided by the normal path and can remove some duplicated compat code. iptables-restore time of autogenerated ruleset with 300k chains of form -A CHAIN0001 -m limit --limit 1/s -j CHAIN0002 -A CHAIN0002 -m limit --limit 1/s -j CHAIN0003 shows no noticeable differences in restore times: old: 0m30.796s new: 0m31.521s 64bit: 0m25.674s Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Willy Tarreau <w@1wt.eu>
2016-04-01 12:17:34 +00:00
strlcpy(name, target->name, sizeof(name));
module_put(target->me);
strncpy(t->u.user.name, name, sizeof(t->u.user.name));
*size += off;
*dstptr += tsize;
}
EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
int xt_compat_target_to_user(const struct xt_entry_target *t,
void __user **dstptr, unsigned int *size)
{
const struct xt_target *target = t->u.kernel.target;
struct compat_xt_entry_target __user *ct = *dstptr;
int off = xt_compat_target_offset(target);
u_int16_t tsize = t->u.user.target_size - off;
if (copy_to_user(ct, t, sizeof(*ct)) ||
put_user(tsize, &ct->u.user.target_size) ||
copy_to_user(ct->u.user.name, t->u.kernel.target->name,
strlen(t->u.kernel.target->name) + 1))
return -EFAULT;
if (target->compat_to_user) {
if (target->compat_to_user((void __user *)ct->data, t->data))
return -EFAULT;
} else {
if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct)))
return -EFAULT;
}
*size -= off;
*dstptr += tsize;
return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
#endif
struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
struct xt_table_info *newinfo;
int cpu;
/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
return NULL;
newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
if (!newinfo)
return NULL;
newinfo->size = size;
for_each_possible_cpu(cpu) {
if (size <= PAGE_SIZE)
newinfo->entries[cpu] = kmalloc_node(size,
GFP_KERNEL,
cpu_to_node(cpu));
else
newinfo->entries[cpu] = vmalloc_node(size,
cpu_to_node(cpu));
if (newinfo->entries[cpu] == NULL) {
xt_free_table_info(newinfo);
return NULL;
}
}
return newinfo;
}
EXPORT_SYMBOL(xt_alloc_table_info);
void xt_free_table_info(struct xt_table_info *info)
{
int cpu;
for_each_possible_cpu(cpu) {
if (info->size <= PAGE_SIZE)
kfree(info->entries[cpu]);
else
vfree(info->entries[cpu]);
}
if (info->jumpstack != NULL) {
if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
for_each_possible_cpu(cpu)
vfree(info->jumpstack[cpu]);
} else {
for_each_possible_cpu(cpu)
kfree(info->jumpstack[cpu]);
}
}
if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
vfree(info->jumpstack);
else
kfree(info->jumpstack);
free_percpu(info->stackptr);
kfree(info);
}
EXPORT_SYMBOL(xt_free_table_info);
/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name)
{
struct xt_table *t;
if (mutex_lock_interruptible(&xt[af].mutex) != 0)
return ERR_PTR(-EINTR);
list_for_each_entry(t, &net->xt.tables[af], list)
if (strcmp(t->name, name) == 0 && try_module_get(t->me))
return t;
mutex_unlock(&xt[af].mutex);
return NULL;
}
EXPORT_SYMBOL_GPL(xt_find_table_lock);
void xt_table_unlock(struct xt_table *table)
{
mutex_unlock(&xt[table->af].mutex);
}
EXPORT_SYMBOL_GPL(xt_table_unlock);
#ifdef CONFIG_COMPAT
void xt_compat_lock(u_int8_t af)
{
mutex_lock(&xt[af].compat_mutex);
}
EXPORT_SYMBOL_GPL(xt_compat_lock);
void xt_compat_unlock(u_int8_t af)
{
mutex_unlock(&xt[af].compat_mutex);
}
EXPORT_SYMBOL_GPL(xt_compat_unlock);
#endif
DEFINE_PER_CPU(seqcount_t, xt_recseq);
EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
static int xt_jumpstack_alloc(struct xt_table_info *i)
{
unsigned int size;
int cpu;
i->stackptr = alloc_percpu(unsigned int);
if (i->stackptr == NULL)
return -ENOMEM;
size = sizeof(void **) * nr_cpu_ids;
if (size > PAGE_SIZE)
i->jumpstack = vzalloc(size);
else
i->jumpstack = kzalloc(size, GFP_KERNEL);
if (i->jumpstack == NULL)
return -ENOMEM;
i->stacksize *= xt_jumpstack_multiplier;
size = sizeof(void *) * i->stacksize;
for_each_possible_cpu(cpu) {
if (size > PAGE_SIZE)
i->jumpstack[cpu] = vmalloc_node(size,
cpu_to_node(cpu));
else
i->jumpstack[cpu] = kmalloc_node(size,
GFP_KERNEL, cpu_to_node(cpu));
if (i->jumpstack[cpu] == NULL)
/*
* Freeing will be done later on by the callers. The
* chain is: xt_replace_table -> __do_replace ->
* do_replace -> xt_free_table_info.
*/
return -ENOMEM;
}
return 0;
}
struct xt_table_info *
xt_replace_table(struct xt_table *table,
unsigned int num_counters,
struct xt_table_info *newinfo,
int *error)
{
struct xt_table_info *private;
int ret;
ret = xt_jumpstack_alloc(newinfo);
if (ret < 0) {
*error = ret;
return NULL;
}
/* Do the substitution. */
local_bh_disable();
private = table->private;
/* Check inside lock: is the old number correct? */
if (num_counters != private->number) {
pr_debug("num_counters != table->private->number (%u/%u)\n",
num_counters, private->number);
local_bh_enable();
*error = -EAGAIN;
return NULL;
}
newinfo->initial_entries = private->initial_entries;
netfilter: x_tables: fix ordering of jumpstack allocation and table update During kernel stability testing on an SMP ARMv7 system, Yalin Wang reported the following panic from the netfilter code: 1fe0: 0000001c 5e2d3b10 4007e779 4009e110 60000010 00000032 ff565656 ff545454 [<c06c48dc>] (ipt_do_table+0x448/0x584) from [<c0655ef0>] (nf_iterate+0x48/0x7c) [<c0655ef0>] (nf_iterate+0x48/0x7c) from [<c0655f7c>] (nf_hook_slow+0x58/0x104) [<c0655f7c>] (nf_hook_slow+0x58/0x104) from [<c0683bbc>] (ip_local_deliver+0x88/0xa8) [<c0683bbc>] (ip_local_deliver+0x88/0xa8) from [<c0683718>] (ip_rcv_finish+0x418/0x43c) [<c0683718>] (ip_rcv_finish+0x418/0x43c) from [<c062b1c4>] (__netif_receive_skb+0x4cc/0x598) [<c062b1c4>] (__netif_receive_skb+0x4cc/0x598) from [<c062b314>] (process_backlog+0x84/0x158) [<c062b314>] (process_backlog+0x84/0x158) from [<c062de84>] (net_rx_action+0x70/0x1dc) [<c062de84>] (net_rx_action+0x70/0x1dc) from [<c0088230>] (__do_softirq+0x11c/0x27c) [<c0088230>] (__do_softirq+0x11c/0x27c) from [<c008857c>] (do_softirq+0x44/0x50) [<c008857c>] (do_softirq+0x44/0x50) from [<c0088614>] (local_bh_enable_ip+0x8c/0xd0) [<c0088614>] (local_bh_enable_ip+0x8c/0xd0) from [<c06b0330>] (inet_stream_connect+0x164/0x298) [<c06b0330>] (inet_stream_connect+0x164/0x298) from [<c061d68c>] (sys_connect+0x88/0xc8) [<c061d68c>] (sys_connect+0x88/0xc8) from [<c000e340>] (ret_fast_syscall+0x0/0x30) Code: 2a000021 e59d2028 e59de01c e59f011c (e7824103) ---[ end trace da227214a82491bd ]--- Kernel panic - not syncing: Fatal exception in interrupt This comes about because CPU1 is executing xt_replace_table in response to a setsockopt syscall, resulting in: ret = xt_jumpstack_alloc(newinfo); --> newinfo->jumpstack = kzalloc(size, GFP_KERNEL); [...] table->private = newinfo; newinfo->initial_entries = private->initial_entries; Meanwhile, CPU0 is handling the network receive path and ends up in ipt_do_table, resulting in: private = table->private; [...] jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; On weakly ordered memory architectures, the writes to table->private and newinfo->jumpstack from CPU1 can be observed out of order by CPU0. Furthermore, on architectures which don't respect ordering of address dependencies (i.e. Alpha), the reads from CPU0 can also be re-ordered. This patch adds an smp_wmb() before the assignment to table->private (which is essentially publishing newinfo) to ensure that all writes to newinfo will be observed before plugging it into the table structure. A dependent-read barrier is also added on the consumer sides, to ensure the same ordering requirements are also respected there. Change-Id: Ia320d52510d7184c0f13d7f130102dbe685e8d6f Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reported-by: Wang, Yalin <Yalin.Wang@sonymobile.com> Tested-by: Wang, Yalin <Yalin.Wang@sonymobile.com> Signed-off-by: Will Deacon <will.deacon@arm.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Git-commit: b416c144f46af1a30ddfa4e4319a8f077381ad63 Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Signed-off-by: Osvaldo Banuelos <osvaldob@codeaurora.org>
2013-10-21 12:14:53 +00:00
/*
* Ensure contents of newinfo are visible before assigning to
* private.
*/
smp_wmb();
table->private = newinfo;
/*
* Even though table entries have now been swapped, other CPU's
* may still be using the old entries. This is okay, because
* resynchronization happens because of the locking done
* during the get_counters() routine.
*/
local_bh_enable();
#ifdef CONFIG_AUDIT
if (audit_enabled) {
struct audit_buffer *ab;
ab = audit_log_start(current->audit_context, GFP_KERNEL,
AUDIT_NETFILTER_CFG);
if (ab) {
audit_log_format(ab, "table=%s family=%u entries=%u",
table->name, table->af,
private->number);
audit_log_end(ab);
}
}
#endif
return private;
}
EXPORT_SYMBOL_GPL(xt_replace_table);
struct xt_table *xt_register_table(struct net *net,
const struct xt_table *input_table,
struct xt_table_info *bootstrap,
struct xt_table_info *newinfo)
{
int ret;
struct xt_table_info *private;
struct xt_table *t, *table;
/* Don't add one object to multiple lists. */
table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
if (!table) {
ret = -ENOMEM;
goto out;
}
ret = mutex_lock_interruptible(&xt[table->af].mutex);
if (ret != 0)
goto out_free;
/* Don't autoload: we'd eat our tail... */
list_for_each_entry(t, &net->xt.tables[table->af], list) {
if (strcmp(t->name, table->name) == 0) {
ret = -EEXIST;
goto unlock;
}
}
/* Simplifies replace_table code. */
table->private = bootstrap;
if (!xt_replace_table(table, 0, newinfo, &ret))
goto unlock;
private = table->private;
pr_debug("table->private->number = %u\n", private->number);
/* save number of initial entries */
private->initial_entries = private->number;
list_add(&table->list, &net->xt.tables[table->af]);
mutex_unlock(&xt[table->af].mutex);
return table;
unlock:
mutex_unlock(&xt[table->af].mutex);
out_free:
kfree(table);
out:
return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(xt_register_table);
void *xt_unregister_table(struct xt_table *table)
{
struct xt_table_info *private;
mutex_lock(&xt[table->af].mutex);
private = table->private;
list_del(&table->list);
mutex_unlock(&xt[table->af].mutex);
kfree(table);
return private;
}
EXPORT_SYMBOL_GPL(xt_unregister_table);
#ifdef CONFIG_PROC_FS
struct xt_names_priv {
struct seq_net_private p;
u_int8_t af;
};
static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
{
struct xt_names_priv *priv = seq->private;
struct net *net = seq_file_net(seq);
u_int8_t af = priv->af;
mutex_lock(&xt[af].mutex);
return seq_list_start(&net->xt.tables[af], *pos);
}
static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct xt_names_priv *priv = seq->private;
struct net *net = seq_file_net(seq);
u_int8_t af = priv->af;
return seq_list_next(v, &net->xt.tables[af], pos);
}
static void xt_table_seq_stop(struct seq_file *seq, void *v)
{
struct xt_names_priv *priv = seq->private;
u_int8_t af = priv->af;
mutex_unlock(&xt[af].mutex);
}
static int xt_table_seq_show(struct seq_file *seq, void *v)
{
struct xt_table *table = list_entry(v, struct xt_table, list);
if (strlen(table->name))
return seq_printf(seq, "%s\n", table->name);
else
return 0;
}
static const struct seq_operations xt_table_seq_ops = {
.start = xt_table_seq_start,
.next = xt_table_seq_next,
.stop = xt_table_seq_stop,
.show = xt_table_seq_show,
};
static int xt_table_open(struct inode *inode, struct file *file)
{
int ret;
struct xt_names_priv *priv;
ret = seq_open_net(inode, file, &xt_table_seq_ops,
sizeof(struct xt_names_priv));
if (!ret) {
priv = ((struct seq_file *)file->private_data)->private;
priv->af = (unsigned long)PDE_DATA(inode);
}
return ret;
}
static const struct file_operations xt_table_ops = {
.owner = THIS_MODULE,
.open = xt_table_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_net,
};
/*
* Traverse state for ip{,6}_{tables,matches} for helping crossing
* the multi-AF mutexes.
*/
struct nf_mttg_trav {
struct list_head *head, *curr;
uint8_t class, nfproto;
};
enum {
MTTG_TRAV_INIT,
MTTG_TRAV_NFP_UNSPEC,
MTTG_TRAV_NFP_SPEC,
MTTG_TRAV_DONE,
};
static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
bool is_target)
{
static const uint8_t next_class[] = {
[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
[MTTG_TRAV_NFP_SPEC] = MTTG_TRAV_DONE,
};
struct nf_mttg_trav *trav = seq->private;
switch (trav->class) {
case MTTG_TRAV_INIT:
trav->class = MTTG_TRAV_NFP_UNSPEC;
mutex_lock(&xt[NFPROTO_UNSPEC].mutex);
trav->head = trav->curr = is_target ?
&xt[NFPROTO_UNSPEC].target : &xt[NFPROTO_UNSPEC].match;
break;
case MTTG_TRAV_NFP_UNSPEC:
trav->curr = trav->curr->next;
if (trav->curr != trav->head)
break;
mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
mutex_lock(&xt[trav->nfproto].mutex);
trav->head = trav->curr = is_target ?
&xt[trav->nfproto].target : &xt[trav->nfproto].match;
trav->class = next_class[trav->class];
break;
case MTTG_TRAV_NFP_SPEC:
trav->curr = trav->curr->next;
if (trav->curr != trav->head)
break;
/* fallthru, _stop will unlock */
default:
return NULL;
}
if (ppos != NULL)
++*ppos;
return trav;
}
static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
bool is_target)
{
struct nf_mttg_trav *trav = seq->private;
unsigned int j;
trav->class = MTTG_TRAV_INIT;
for (j = 0; j < *pos; ++j)
if (xt_mttg_seq_next(seq, NULL, NULL, is_target) == NULL)
return NULL;
return trav;
}
static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
{
struct nf_mttg_trav *trav = seq->private;
switch (trav->class) {
case MTTG_TRAV_NFP_UNSPEC:
mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
break;
case MTTG_TRAV_NFP_SPEC:
mutex_unlock(&xt[trav->nfproto].mutex);
break;
}
}
static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
{
return xt_mttg_seq_start(seq, pos, false);
}
static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
{
return xt_mttg_seq_next(seq, v, ppos, false);
}
static int xt_match_seq_show(struct seq_file *seq, void *v)
{
const struct nf_mttg_trav *trav = seq->private;
const struct xt_match *match;
switch (trav->class) {
case MTTG_TRAV_NFP_UNSPEC:
case MTTG_TRAV_NFP_SPEC:
if (trav->curr == trav->head)
return 0;
match = list_entry(trav->curr, struct xt_match, list);
return (*match->name == '\0') ? 0 :
seq_printf(seq, "%s\n", match->name);
}
return 0;
}
static const struct seq_operations xt_match_seq_ops = {
.start = xt_match_seq_start,
.next = xt_match_seq_next,
.stop = xt_mttg_seq_stop,
.show = xt_match_seq_show,
};
static int xt_match_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
struct nf_mttg_trav *trav;
int ret;
trav = kmalloc(sizeof(*trav), GFP_KERNEL);
if (trav == NULL)
return -ENOMEM;
ret = seq_open(file, &xt_match_seq_ops);
if (ret < 0) {
kfree(trav);
return ret;
}
seq = file->private_data;
seq->private = trav;
trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
static const struct file_operations xt_match_ops = {
.owner = THIS_MODULE,
.open = xt_match_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
{
return xt_mttg_seq_start(seq, pos, true);
}
static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
{
return xt_mttg_seq_next(seq, v, ppos, true);
}
static int xt_target_seq_show(struct seq_file *seq, void *v)
{
const struct nf_mttg_trav *trav = seq->private;
const struct xt_target *target;
switch (trav->class) {
case MTTG_TRAV_NFP_UNSPEC:
case MTTG_TRAV_NFP_SPEC:
if (trav->curr == trav->head)
return 0;
target = list_entry(trav->curr, struct xt_target, list);
return (*target->name == '\0') ? 0 :
seq_printf(seq, "%s\n", target->name);
}
return 0;
}
static const struct seq_operations xt_target_seq_ops = {
.start = xt_target_seq_start,
.next = xt_target_seq_next,
.stop = xt_mttg_seq_stop,
.show = xt_target_seq_show,
};
static int xt_target_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
struct nf_mttg_trav *trav;
int ret;
trav = kmalloc(sizeof(*trav), GFP_KERNEL);
if (trav == NULL)
return -ENOMEM;
ret = seq_open(file, &xt_target_seq_ops);
if (ret < 0) {
kfree(trav);
return ret;
}
seq = file->private_data;
seq->private = trav;
trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
static const struct file_operations xt_target_ops = {
.owner = THIS_MODULE,
.open = xt_target_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
#define FORMAT_TABLES "_tables_names"
#define FORMAT_MATCHES "_tables_matches"
#define FORMAT_TARGETS "_tables_targets"
#endif /* CONFIG_PROC_FS */
/**
* xt_hook_link - set up hooks for a new table
* @table: table with metadata needed to set up hooks
* @fn: Hook function
*
* This function will take care of creating and registering the necessary
* Netfilter hooks for XT tables.
*/
struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
{
unsigned int hook_mask = table->valid_hooks;
uint8_t i, num_hooks = hweight32(hook_mask);
uint8_t hooknum;
struct nf_hook_ops *ops;
int ret;
ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
if (ops == NULL)
return ERR_PTR(-ENOMEM);
for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
hook_mask >>= 1, ++hooknum) {
if (!(hook_mask & 1))
continue;
ops[i].hook = fn;
ops[i].owner = table->me;
ops[i].pf = table->af;
ops[i].hooknum = hooknum;
ops[i].priority = table->priority;
++i;
}
ret = nf_register_hooks(ops, num_hooks);
if (ret < 0) {
kfree(ops);
return ERR_PTR(ret);
}
return ops;
}
EXPORT_SYMBOL_GPL(xt_hook_link);
/**
* xt_hook_unlink - remove hooks for a table
* @ops: nf_hook_ops array as returned by nf_hook_link
* @hook_mask: the very same mask that was passed to nf_hook_link
*/
void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops)
{
nf_unregister_hooks(ops, hweight32(table->valid_hooks));
kfree(ops);
}
EXPORT_SYMBOL_GPL(xt_hook_unlink);
int xt_proto_init(struct net *net, u_int8_t af)
{
#ifdef CONFIG_PROC_FS
char buf[XT_FUNCTION_MAXNAMELEN];
struct proc_dir_entry *proc;
#endif
if (af >= ARRAY_SIZE(xt_prefix))
return -EINVAL;
#ifdef CONFIG_PROC_FS
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops,
(void *)(unsigned long)af);
if (!proc)
goto out;
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
proc = proc_create_data(buf, 0440, net->proc_net, &xt_match_ops,
(void *)(unsigned long)af);
if (!proc)
goto out_remove_tables;
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
proc = proc_create_data(buf, 0440, net->proc_net, &xt_target_ops,
(void *)(unsigned long)af);
if (!proc)
goto out_remove_matches;
#endif
return 0;
#ifdef CONFIG_PROC_FS
out_remove_matches:
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
remove_proc_entry(buf, net->proc_net);
out_remove_tables:
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
remove_proc_entry(buf, net->proc_net);
out:
return -1;
#endif
}
EXPORT_SYMBOL_GPL(xt_proto_init);
void xt_proto_fini(struct net *net, u_int8_t af)
{
#ifdef CONFIG_PROC_FS
char buf[XT_FUNCTION_MAXNAMELEN];
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
remove_proc_entry(buf, net->proc_net);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
remove_proc_entry(buf, net->proc_net);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
remove_proc_entry(buf, net->proc_net);
#endif /*CONFIG_PROC_FS*/
}
EXPORT_SYMBOL_GPL(xt_proto_fini);
static int __net_init xt_net_init(struct net *net)
{
int i;
for (i = 0; i < NFPROTO_NUMPROTO; i++)
INIT_LIST_HEAD(&net->xt.tables[i]);
return 0;
}
static struct pernet_operations xt_net_ops = {
.init = xt_net_init,
};
static int __init xt_init(void)
{
unsigned int i;
int rv;
for_each_possible_cpu(i) {
seqcount_init(&per_cpu(xt_recseq, i));
}
netfilter: compat: initialize all fields in xt_init commit 8d29d16d21342a0c86405d46de0c4ac5daf1760f upstream If a non zero value happens to be in xt[NFPROTO_BRIDGE].cur at init time, the following panic can be caused by running % ebtables -t broute -F BROUTING from a 32-bit user level on a 64-bit kernel. This patch replaces kmalloc_array with kcalloc when allocating xt. [ 474.680846] BUG: unable to handle kernel paging request at 0000000009600920 [ 474.687869] PGD 2037006067 P4D 2037006067 PUD 2038938067 PMD 0 [ 474.693838] Oops: 0000 [#1] SMP [ 474.697055] CPU: 9 PID: 4662 Comm: ebtables Kdump: loaded Not tainted 4.19.17-11302235.AroraKernelnext.fc18.x86_64 #1 [ 474.707721] Hardware name: Supermicro X9DRT/X9DRT, BIOS 3.0 06/28/2013 [ 474.714313] RIP: 0010:xt_compat_calc_jump+0x2f/0x63 [x_tables] [ 474.720201] Code: 40 0f b6 ff 55 31 c0 48 6b ff 70 48 03 3d dc 45 00 00 48 89 e5 8b 4f 6c 4c 8b 47 60 ff c9 39 c8 7f 2f 8d 14 08 d1 fa 48 63 fa <41> 39 34 f8 4c 8d 0c fd 00 00 00 00 73 05 8d 42 01 eb e1 76 05 8d [ 474.739023] RSP: 0018:ffffc9000943fc58 EFLAGS: 00010207 [ 474.744296] RAX: 0000000000000000 RBX: ffffc90006465000 RCX: 0000000002580249 [ 474.751485] RDX: 00000000012c0124 RSI: fffffffff7be17e9 RDI: 00000000012c0124 [ 474.758670] RBP: ffffc9000943fc58 R08: 0000000000000000 R09: ffffffff8117cf8f [ 474.765855] R10: ffffc90006477000 R11: 0000000000000000 R12: 0000000000000001 [ 474.773048] R13: 0000000000000000 R14: ffffc9000943fcb8 R15: ffffc9000943fcb8 [ 474.780234] FS: 0000000000000000(0000) GS:ffff88a03f840000(0063) knlGS:00000000f7ac7700 [ 474.788612] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 474.794632] CR2: 0000000009600920 CR3: 0000002037422006 CR4: 00000000000606e0 [ 474.802052] Call Trace: [ 474.804789] compat_do_replace+0x1fb/0x2a3 [ebtables] [ 474.810105] compat_do_ebt_set_ctl+0x69/0xe6 [ebtables] [ 474.815605] ? try_module_get+0x37/0x42 [ 474.819716] compat_nf_setsockopt+0x4f/0x6d [ 474.824172] compat_ip_setsockopt+0x7e/0x8c [ 474.828641] compat_raw_setsockopt+0x16/0x3a [ 474.833220] compat_sock_common_setsockopt+0x1d/0x24 [ 474.838458] __compat_sys_setsockopt+0x17e/0x1b1 [ 474.843343] ? __check_object_size+0x76/0x19a [ 474.847960] __ia32_compat_sys_socketcall+0x1cb/0x25b [ 474.853276] do_fast_syscall_32+0xaf/0xf6 [ 474.857548] entry_SYSENTER_compat+0x6b/0x7a Signed-off-by: Francesco Ruggeri <fruggeri@arista.com> Acked-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Zubin Mithra <zsm@chromium.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
2019-05-10 16:19:30 +00:00
xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
if (!xt)
return -ENOMEM;
for (i = 0; i < NFPROTO_NUMPROTO; i++) {
mutex_init(&xt[i].mutex);
#ifdef CONFIG_COMPAT
mutex_init(&xt[i].compat_mutex);
xt[i].compat_tab = NULL;
#endif
INIT_LIST_HEAD(&xt[i].target);
INIT_LIST_HEAD(&xt[i].match);
}
rv = register_pernet_subsys(&xt_net_ops);
if (rv < 0)
kfree(xt);
return rv;
}
static void __exit xt_fini(void)
{
unregister_pernet_subsys(&xt_net_ops);
kfree(xt);
}
module_init(xt_init);
module_exit(xt_fini);