android_kernel_samsung_msm8976/net/sched/cls_api.c

631 lines
14 KiB
C
Raw Normal View History

/*
* net/sched/cls_api.c Packet classifier API.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* Changes:
*
* Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/err.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
/* The list of all installed classifier types */
static struct tcf_proto_ops *tcf_proto_base __read_mostly;
/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);
/* Find classifier type by string name */
static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
{
const struct tcf_proto_ops *t = NULL;
if (kind) {
read_lock(&cls_mod_lock);
for (t = tcf_proto_base; t; t = t->next) {
if (nla_strcmp(kind, t->kind) == 0) {
if (!try_module_get(t->owner))
t = NULL;
break;
}
}
read_unlock(&cls_mod_lock);
}
return t;
}
/* Register(unregister) new classifier type */
int register_tcf_proto_ops(struct tcf_proto_ops *ops)
{
struct tcf_proto_ops *t, **tp;
int rc = -EEXIST;
write_lock(&cls_mod_lock);
for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
if (!strcmp(ops->kind, t->kind))
goto out;
ops->next = NULL;
*tp = ops;
rc = 0;
out:
write_unlock(&cls_mod_lock);
return rc;
}
EXPORT_SYMBOL(register_tcf_proto_ops);
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
struct tcf_proto_ops *t, **tp;
int rc = -ENOENT;
write_lock(&cls_mod_lock);
for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
if (t == ops)
break;
if (!t)
goto out;
*tp = t->next;
rc = 0;
out:
write_unlock(&cls_mod_lock);
return rc;
}
EXPORT_SYMBOL(unregister_tcf_proto_ops);
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
unsigned long fh, int event);
/* Select new prio value from the range, managed by kernel. */
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
{
u32 first = TC_H_MAKE(0xC0000000U, 0U);
if (tp)
first = tp->prio - 1;
return first;
}
/* Add/change/delete/get a filter node */
static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
spinlock_t *root_lock;
struct tcmsg *t;
u32 protocol;
u32 prio;
u32 nprio;
u32 parent;
struct net_device *dev;
struct Qdisc *q;
struct tcf_proto **back, **chain;
struct tcf_proto *tp;
const struct tcf_proto_ops *tp_ops;
const struct Qdisc_class_ops *cops;
unsigned long cl;
unsigned long fh;
int err;
net, sched: fix soft lockup in tc_classify commit 628185cfddf1dfb701c4efe2cfd72cf5b09f5702 upstream. Shahar reported a soft lockup in tc_classify(), where we run into an endless loop when walking the classifier chain due to tp->next == tp which is a state we should never run into. The issue only seems to trigger under load in the tc control path. What happens is that in tc_ctl_tfilter(), thread A allocates a new tp, initializes it, sets tp_created to 1, and calls into tp->ops->change() with it. In that classifier callback we had to unlock/lock the rtnl mutex and returned with -EAGAIN. One reason why we need to drop there is, for example, that we need to request an action module to be loaded. This happens via tcf_exts_validate() -> tcf_action_init/_1() meaning after we loaded and found the requested action, we need to redo the whole request so we don't race against others. While we had to unlock rtnl in that time, thread B's request was processed next on that CPU. Thread B added a new tp instance successfully to the classifier chain. When thread A returned grabbing the rtnl mutex again, propagating -EAGAIN and destroying its tp instance which never got linked, we goto replay and redo A's request. This time when walking the classifier chain in tc_ctl_tfilter() for checking for existing tp instances we had a priority match and found the tp instance that was created and linked by thread B. Now calling again into tp->ops->change() with that tp was successful and returned without error. tp_created was never cleared in the second round, thus kernel thinks that we need to link it into the classifier chain (once again). tp and *back point to the same object due to the match we had earlier on. Thus for thread B's already public tp, we reset tp->next to tp itself and link it into the chain, which eventually causes the mentioned endless loop in tc_classify() once a packet hits the data path. Fix is to clear tp_created at the beginning of each request, also when we replay it. On the paths that can cause -EAGAIN we already destroy the original tp instance we had and on replay we really need to start from scratch. It seems that this issue was first introduced in commit 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup"). Fixes: 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup") Reported-by: Shahar Klein <shahark@mellanox.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Eric Dumazet <edumazet@google.com> Tested-by: Shahar Klein <shahark@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Willy Tarreau <w@1wt.eu>
2016-12-21 17:04:11 +00:00
int tp_created;
if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
replay:
net, sched: fix soft lockup in tc_classify commit 628185cfddf1dfb701c4efe2cfd72cf5b09f5702 upstream. Shahar reported a soft lockup in tc_classify(), where we run into an endless loop when walking the classifier chain due to tp->next == tp which is a state we should never run into. The issue only seems to trigger under load in the tc control path. What happens is that in tc_ctl_tfilter(), thread A allocates a new tp, initializes it, sets tp_created to 1, and calls into tp->ops->change() with it. In that classifier callback we had to unlock/lock the rtnl mutex and returned with -EAGAIN. One reason why we need to drop there is, for example, that we need to request an action module to be loaded. This happens via tcf_exts_validate() -> tcf_action_init/_1() meaning after we loaded and found the requested action, we need to redo the whole request so we don't race against others. While we had to unlock rtnl in that time, thread B's request was processed next on that CPU. Thread B added a new tp instance successfully to the classifier chain. When thread A returned grabbing the rtnl mutex again, propagating -EAGAIN and destroying its tp instance which never got linked, we goto replay and redo A's request. This time when walking the classifier chain in tc_ctl_tfilter() for checking for existing tp instances we had a priority match and found the tp instance that was created and linked by thread B. Now calling again into tp->ops->change() with that tp was successful and returned without error. tp_created was never cleared in the second round, thus kernel thinks that we need to link it into the classifier chain (once again). tp and *back point to the same object due to the match we had earlier on. Thus for thread B's already public tp, we reset tp->next to tp itself and link it into the chain, which eventually causes the mentioned endless loop in tc_classify() once a packet hits the data path. Fix is to clear tp_created at the beginning of each request, also when we replay it. On the paths that can cause -EAGAIN we already destroy the original tp instance we had and on replay we really need to start from scratch. It seems that this issue was first introduced in commit 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup"). Fixes: 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup") Reported-by: Shahar Klein <shahark@mellanox.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Eric Dumazet <edumazet@google.com> Tested-by: Shahar Klein <shahark@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Willy Tarreau <w@1wt.eu>
2016-12-21 17:04:11 +00:00
tp_created = 0;
err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
if (err < 0)
return err;
t = nlmsg_data(n);
protocol = TC_H_MIN(t->tcm_info);
prio = TC_H_MAJ(t->tcm_info);
nprio = prio;
parent = t->tcm_parent;
cl = 0;
if (prio == 0) {
/* If no priority is given, user wants we allocated it. */
if (n->nlmsg_type != RTM_NEWTFILTER ||
!(n->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
prio = TC_H_MAKE(0x80000000U, 0U);
}
/* Find head of filter chain. */
/* Find link */
dev = __dev_get_by_index(net, t->tcm_ifindex);
if (dev == NULL)
return -ENODEV;
/* Find qdisc */
if (!parent) {
q = dev->qdisc;
parent = q->handle;
} else {
q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
if (q == NULL)
return -EINVAL;
}
/* Is it classful? */
cops = q->ops->cl_ops;
if (!cops)
return -EINVAL;
if (cops->tcf_chain == NULL)
return -EOPNOTSUPP;
/* Do we search for filter, attached to class? */
if (TC_H_MIN(parent)) {
cl = cops->get(q, parent);
if (cl == 0)
return -ENOENT;
}
/* And the last stroke */
chain = cops->tcf_chain(q, cl);
err = -EINVAL;
if (chain == NULL)
goto errout;
/* Check the chain for existence of proto-tcf with this priority */
for (back = chain; (tp = *back) != NULL; back = &tp->next) {
if (tp->prio >= prio) {
if (tp->prio == prio) {
if (!nprio ||
(tp->protocol != protocol && protocol))
goto errout;
} else
tp = NULL;
break;
}
}
root_lock = qdisc_root_sleeping_lock(q);
if (tp == NULL) {
/* Proto-tcf does not exist, create new one */
if (tca[TCA_KIND] == NULL || !protocol)
goto errout;
err = -ENOENT;
if (n->nlmsg_type != RTM_NEWTFILTER ||
!(n->nlmsg_flags & NLM_F_CREATE))
goto errout;
/* Create new proto tcf */
err = -ENOBUFS;
tp = kzalloc(sizeof(*tp), GFP_KERNEL);
if (tp == NULL)
goto errout;
err = -ENOENT;
tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
if (tp_ops == NULL) {
#ifdef CONFIG_MODULES
struct nlattr *kind = tca[TCA_KIND];
char name[IFNAMSIZ];
if (kind != NULL &&
nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
rtnl_unlock();
request_module("cls_%s", name);
rtnl_lock();
tp_ops = tcf_proto_lookup_ops(kind);
/* We dropped the RTNL semaphore in order to
* perform the module load. So, even if we
* succeeded in loading the module we have to
* replay the request. We indicate this using
* -EAGAIN.
*/
if (tp_ops != NULL) {
module_put(tp_ops->owner);
err = -EAGAIN;
}
}
#endif
kfree(tp);
goto errout;
}
tp->ops = tp_ops;
tp->protocol = protocol;
tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back));
tp->q = q;
tp->classify = tp_ops->classify;
tp->classid = parent;
err = tp_ops->init(tp);
if (err != 0) {
module_put(tp_ops->owner);
kfree(tp);
goto errout;
}
tp_created = 1;
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
goto errout;
fh = tp->ops->get(tp, t->tcm_handle);
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
spin_lock_bh(root_lock);
*back = tp->next;
spin_unlock_bh(root_lock);
tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
tcf_destroy(tp);
err = 0;
goto errout;
}
err = -ENOENT;
if (n->nlmsg_type != RTM_NEWTFILTER ||
!(n->nlmsg_flags & NLM_F_CREATE))
goto errout;
} else {
switch (n->nlmsg_type) {
case RTM_NEWTFILTER:
err = -EEXIST;
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
tcf_destroy(tp);
goto errout;
}
break;
case RTM_DELTFILTER:
err = tp->ops->delete(tp, fh);
if (err == 0)
net sched filters: fix notification of filter delete with proper handle [ Upstream commit 9ee7837449b3d6f0fcf9132c6b5e5aaa58cc67d4 ] Daniel says: While trying out [1][2], I noticed that tc monitor doesn't show the correct handle on delete: $ tc monitor qdisc clsact ffff: dev eno1 parent ffff:fff1 filter dev eno1 ingress protocol all pref 49152 bpf handle 0x2a [...] deleted filter dev eno1 ingress protocol all pref 49152 bpf handle 0xf3be0c80 some context to explain the above: The user identity of any tc filter is represented by a 32-bit identifier encoded in tcm->tcm_handle. Example 0x2a in the bpf filter above. A user wishing to delete, get or even modify a specific filter uses this handle to reference it. Every classifier is free to provide its own semantics for the 32 bit handle. Example: classifiers like u32 use schemes like 800:1:801 to describe the semantics of their filters represented as hash table, bucket and node ids etc. Classifiers also have internal per-filter representation which is different from this externally visible identity. Most classifiers set this internal representation to be a pointer address (which allows fast retrieval of said filters in their implementations). This internal representation is referenced with the "fh" variable in the kernel control code. When a user successfuly deletes a specific filter, by specifying the correct tcm->tcm_handle, an event is generated to user space which indicates which specific filter was deleted. Before this patch, the "fh" value was sent to user space as the identity. As an example what is shown in the sample bpf filter delete event above is 0xf3be0c80. This is infact a 32-bit truncation of 0xffff8807f3be0c80 which happens to be a 64-bit memory address of the internal filter representation (address of the corresponding filter's struct cls_bpf_prog); After this patch the appropriate user identifiable handle as encoded in the originating request tcm->tcm_handle is generated in the event. One of the cardinal rules of netlink rules is to be able to take an event (such as a delete in this case) and reflect it back to the kernel and successfully delete the filter. This patch achieves that. Note, this issue has existed since the original TC action infrastructure code patch back in 2004 as found in: https://git.kernel.org/cgit/linux/kernel/git/history/history.git/commit/ [1] http://patchwork.ozlabs.org/patch/682828/ [2] http://patchwork.ozlabs.org/patch/682829/ Fixes: 4e54c4816bfe ("[NET]: Add tc extensions infrastructure.") Reported-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> [bwh: Backported to 3.2: adjust context] Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
2016-10-25 00:18:27 +00:00
tfilter_notify(net, skb, n, tp,
t->tcm_handle, RTM_DELTFILTER);
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
goto errout;
default:
err = -EINVAL;
goto errout;
}
}
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh);
if (err == 0) {
if (tp_created) {
spin_lock_bh(root_lock);
tp->next = *back;
*back = tp;
spin_unlock_bh(root_lock);
}
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
} else {
if (tp_created)
tcf_destroy(tp);
}
errout:
if (cl)
cops->put(q, cl);
if (err == -EAGAIN)
/* Replay the request. */
goto replay;
return err;
}
static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
unsigned long fh, u32 portid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
goto out_nlmsg_trim;
tcm = nlmsg_data(nlh);
tcm->tcm_family = AF_UNSPEC;
tcm->tcm__pad1 = 0;
tcm->tcm__pad2 = 0;
tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
tcm->tcm_parent = tp->classid;
tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
goto nla_put_failure;
tcm->tcm_handle = fh;
if (RTM_DELTFILTER != event) {
tcm->tcm_handle = 0;
if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
goto nla_put_failure;
}
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
out_nlmsg_trim:
nla_put_failure:
nlmsg_trim(skb, b);
return -1;
}
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
unsigned long fh, int event)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
}
struct tcf_dump_args {
struct tcf_walker w;
struct sk_buff *skb;
struct netlink_callback *cb;
};
static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
struct tcf_walker *arg)
{
struct tcf_dump_args *a = (void *)arg;
return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
}
/* called with RTNL */
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
int t;
int s_t;
struct net_device *dev;
struct Qdisc *q;
struct tcf_proto *tp, **chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
unsigned long cl = 0;
const struct Qdisc_class_ops *cops;
struct tcf_dump_args arg;
if (nlmsg_len(cb->nlh) < sizeof(*tcm))
return skb->len;
dev = __dev_get_by_index(net, tcm->tcm_ifindex);
if (!dev)
return skb->len;
if (!tcm->tcm_parent)
q = dev->qdisc;
else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
if (!q)
goto out;
cops = q->ops->cl_ops;
if (!cops)
goto errout;
if (cops->tcf_chain == NULL)
goto errout;
if (TC_H_MIN(tcm->tcm_parent)) {
cl = cops->get(q, tcm->tcm_parent);
if (cl == 0)
goto errout;
}
chain = cops->tcf_chain(q, cl);
if (chain == NULL)
goto errout;
s_t = cb->args[0];
for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
if (t < s_t)
continue;
if (TC_H_MAJ(tcm->tcm_info) &&
TC_H_MAJ(tcm->tcm_info) != tp->prio)
continue;
if (TC_H_MIN(tcm->tcm_info) &&
TC_H_MIN(tcm->tcm_info) != tp->protocol)
continue;
if (t > s_t)
memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
if (cb->args[1] == 0) {
if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
break;
cb->args[1] = 1;
}
if (tp->ops->walk == NULL)
continue;
arg.w.fn = tcf_node_dump;
arg.skb = skb;
arg.cb = cb;
arg.w.stop = 0;
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
tp->ops->walk(tp, &arg.w);
cb->args[1] = arg.w.count + 1;
if (arg.w.stop)
break;
}
cb->args[0] = t;
errout:
if (cl)
cops->put(q, cl);
out:
return skb->len;
}
void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
if (exts->action) {
tcf_action_destroy(exts->action, TCA_ACT_UNBIND);
exts->action = NULL;
}
#endif
}
EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
struct nlattr *rate_tlv, struct tcf_exts *exts,
const struct tcf_ext_map *map)
{
memset(exts, 0, sizeof(*exts));
#ifdef CONFIG_NET_CLS_ACT
{
struct tc_action *act;
if (map->police && tb[map->police]) {
act = tcf_action_init_1(net, tb[map->police], rate_tlv,
"police", TCA_ACT_NOREPLACE,
TCA_ACT_BIND);
if (IS_ERR(act))
return PTR_ERR(act);
act->type = TCA_OLD_COMPAT;
exts->action = act;
} else if (map->action && tb[map->action]) {
act = tcf_action_init(net, tb[map->action], rate_tlv,
NULL, TCA_ACT_NOREPLACE,
TCA_ACT_BIND);
if (IS_ERR(act))
return PTR_ERR(act);
exts->action = act;
}
}
#else
if ((map->action && tb[map->action]) ||
(map->police && tb[map->police]))
return -EOPNOTSUPP;
#endif
return 0;
}
EXPORT_SYMBOL(tcf_exts_validate);
void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
struct tcf_exts *src)
{
#ifdef CONFIG_NET_CLS_ACT
if (src->action) {
struct tc_action *act;
tcf_tree_lock(tp);
act = dst->action;
dst->action = src->action;
tcf_tree_unlock(tp);
if (act)
tcf_action_destroy(act, TCA_ACT_UNBIND);
}
#endif
}
EXPORT_SYMBOL(tcf_exts_change);
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
const struct tcf_ext_map *map)
{
#ifdef CONFIG_NET_CLS_ACT
if (map->action && exts->action) {
/*
* again for backward compatible mode - we want
* to work with both old and new modes of entering
* tc data even if iproute2 was newer - jhs
*/
struct nlattr *nest;
if (exts->action->type != TCA_OLD_COMPAT) {
nest = nla_nest_start(skb, map->action);
if (nest == NULL)
goto nla_put_failure;
if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
} else if (map->police) {
nest = nla_nest_start(skb, map->police);
if (nest == NULL)
goto nla_put_failure;
if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
}
}
#endif
return 0;
nla_put_failure: __attribute__ ((unused))
return -1;
}
EXPORT_SYMBOL(tcf_exts_dump);
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
const struct tcf_ext_map *map)
{
#ifdef CONFIG_NET_CLS_ACT
if (exts->action)
if (tcf_action_copy_stats(skb, exts->action, 1) < 0)
goto nla_put_failure;
#endif
return 0;
nla_put_failure: __attribute__ ((unused))
return -1;
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
static int __init tc_filter_init(void)
{
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
tc_dump_tfilter, NULL);
return 0;
}
subsys_initcall(tc_filter_init);