[IPV6]: Multiple Routing Tables

Adds the framework to support multiple IPv6 routing tables.
Currently all automatically generated routes are put into the
same table. This could be changed at a later point after
considering the produced locking overhead.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Thomas Graf 2006-08-04 23:20:06 -07:00 committed by David S. Miller
parent 5d0bbeeb14
commit c71099acce
6 changed files with 499 additions and 195 deletions

View file

@ -51,6 +51,8 @@ struct rt6key
int plen; int plen;
}; };
struct fib6_table;
struct rt6_info struct rt6_info
{ {
union { union {
@ -71,6 +73,7 @@ struct rt6_info
u32 rt6i_flags; u32 rt6i_flags;
u32 rt6i_metric; u32 rt6i_metric;
atomic_t rt6i_ref; atomic_t rt6i_ref;
struct fib6_table *rt6i_table;
struct rt6key rt6i_dst; struct rt6key rt6i_dst;
struct rt6key rt6i_src; struct rt6key rt6i_src;
@ -143,12 +146,43 @@ struct rt6_statistics {
typedef void (*f_pnode)(struct fib6_node *fn, void *); typedef void (*f_pnode)(struct fib6_node *fn, void *);
extern struct fib6_node ip6_routing_table; struct fib6_table {
struct hlist_node tb6_hlist;
u32 tb6_id;
rwlock_t tb6_lock;
struct fib6_node tb6_root;
};
#define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC
#define RT6_TABLE_MAIN RT_TABLE_MAIN
#define RT6_TABLE_LOCAL RT6_TABLE_MAIN
#define RT6_TABLE_DFLT RT6_TABLE_MAIN
#define RT6_TABLE_INFO RT6_TABLE_MAIN
#define RT6_TABLE_PREFIX RT6_TABLE_MAIN
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB6_TABLE_MIN 1
#define FIB6_TABLE_MAX RT_TABLE_MAX
#else
#define FIB6_TABLE_MIN RT_TABLE_MAIN
#define FIB6_TABLE_MAX FIB6_TABLE_MIN
#endif
#define RT6_F_STRICT 1
#define RT6_F_HAS_SADDR 2
typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *,
struct flowi *, int);
/* /*
* exported functions * exported functions
*/ */
extern struct fib6_table * fib6_get_table(u32 id);
extern struct fib6_table * fib6_new_table(u32 id);
extern struct dst_entry * fib6_rule_lookup(struct flowi *fl, int flags,
pol_lookup_t lookup);
extern struct fib6_node *fib6_lookup(struct fib6_node *root, extern struct fib6_node *fib6_lookup(struct fib6_node *root,
struct in6_addr *daddr, struct in6_addr *daddr,
struct in6_addr *saddr); struct in6_addr *saddr);
@ -161,6 +195,9 @@ extern void fib6_clean_tree(struct fib6_node *root,
int (*func)(struct rt6_info *, void *arg), int (*func)(struct rt6_info *, void *arg),
int prune, void *arg); int prune, void *arg);
extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
int prune, void *arg);
extern int fib6_walk(struct fib6_walker_t *w); extern int fib6_walk(struct fib6_walker_t *w);
extern int fib6_walk_continue(struct fib6_walker_t *w); extern int fib6_walk_continue(struct fib6_walker_t *w);

View file

@ -58,7 +58,8 @@ extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg);
extern int ip6_route_add(struct in6_rtmsg *rtmsg, extern int ip6_route_add(struct in6_rtmsg *rtmsg,
struct nlmsghdr *, struct nlmsghdr *,
void *rtattr, void *rtattr,
struct netlink_skb_parms *req); struct netlink_skb_parms *req,
u32 table_id);
extern int ip6_ins_rt(struct rt6_info *, extern int ip6_ins_rt(struct rt6_info *,
struct nlmsghdr *, struct nlmsghdr *,
void *rtattr, void *rtattr,

View file

@ -136,3 +136,9 @@ config IPV6_TUNNEL
If unsure, say N. If unsure, say N.
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
depends on IPV6 && EXPERIMENTAL
---help---
Support multiple routing tables.

View file

@ -1525,7 +1525,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
rtmsg.rtmsg_flags |= RTF_NONEXTHOP; rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
ip6_route_add(&rtmsg, NULL, NULL, NULL); ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_PREFIX);
} }
/* Create "default" multicast route to the interface */ /* Create "default" multicast route to the interface */
@ -1542,7 +1542,7 @@ static void addrconf_add_mroute(struct net_device *dev)
rtmsg.rtmsg_ifindex = dev->ifindex; rtmsg.rtmsg_ifindex = dev->ifindex;
rtmsg.rtmsg_flags = RTF_UP; rtmsg.rtmsg_flags = RTF_UP;
rtmsg.rtmsg_type = RTMSG_NEWROUTE; rtmsg.rtmsg_type = RTMSG_NEWROUTE;
ip6_route_add(&rtmsg, NULL, NULL, NULL); ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_LOCAL);
} }
static void sit_route_add(struct net_device *dev) static void sit_route_add(struct net_device *dev)
@ -1559,7 +1559,7 @@ static void sit_route_add(struct net_device *dev)
rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP;
rtmsg.rtmsg_ifindex = dev->ifindex; rtmsg.rtmsg_ifindex = dev->ifindex;
ip6_route_add(&rtmsg, NULL, NULL, NULL); ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_MAIN);
} }
static void addrconf_add_lroute(struct net_device *dev) static void addrconf_add_lroute(struct net_device *dev)

View file

@ -26,6 +26,7 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/in6.h> #include <linux/in6.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/list.h>
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
@ -147,6 +148,126 @@ static __inline__ void rt6_release(struct rt6_info *rt)
dst_free(&rt->u.dst); dst_free(&rt->u.dst);
} }
static struct fib6_table fib6_main_tbl = {
.tb6_id = RT6_TABLE_MAIN,
.tb6_lock = RW_LOCK_UNLOCKED,
.tb6_root = {
.leaf = &ip6_null_entry,
.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
},
};
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
#define FIB_TABLE_HASHSZ 256
static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
static struct fib6_table *fib6_alloc_table(u32 id)
{
struct fib6_table *table;
table = kzalloc(sizeof(*table), GFP_ATOMIC);
if (table != NULL) {
table->tb6_id = id;
table->tb6_lock = RW_LOCK_UNLOCKED;
table->tb6_root.leaf = &ip6_null_entry;
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
}
return table;
}
static void fib6_link_table(struct fib6_table *tb)
{
unsigned int h;
h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
/*
* No protection necessary, this is the only list mutatation
* operation, tables never disappear once they exist.
*/
hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
}
struct fib6_table *fib6_new_table(u32 id)
{
struct fib6_table *tb;
if (id == 0)
id = RT6_TABLE_MAIN;
tb = fib6_get_table(id);
if (tb)
return tb;
tb = fib6_alloc_table(id);
if (tb != NULL)
fib6_link_table(tb);
return tb;
}
struct fib6_table *fib6_get_table(u32 id)
{
struct fib6_table *tb;
struct hlist_node *node;
unsigned int h;
if (id == 0)
id = RT6_TABLE_MAIN;
h = id & (FIB_TABLE_HASHSZ - 1);
rcu_read_lock();
hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) {
if (tb->tb6_id == id) {
rcu_read_unlock();
return tb;
}
}
rcu_read_unlock();
return NULL;
}
struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
pol_lookup_t lookup)
{
/*
* TODO: Add rule lookup
*/
struct fib6_table *table = fib6_get_table(RT6_TABLE_MAIN);
return (struct dst_entry *) lookup(table, fl, flags);
}
static void __init fib6_tables_init(void)
{
fib6_link_table(&fib6_main_tbl);
}
#else
struct fib6_table *fib6_new_table(u32 id)
{
return fib6_get_table(id);
}
struct fib6_table *fib6_get_table(u32 id)
{
return &fib6_main_tbl;
}
struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
pol_lookup_t lookup)
{
return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
}
static void __init fib6_tables_init(void)
{
}
#endif
/* /*
* Routing Table * Routing Table
@ -1064,6 +1185,22 @@ void fib6_clean_tree(struct fib6_node *root,
fib6_walk(&c.w); fib6_walk(&c.w);
} }
void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
int prune, void *arg)
{
int i;
struct fib6_table *table;
for (i = FIB6_TABLE_MIN; i <= FIB6_TABLE_MAX; i++) {
table = fib6_get_table(i);
if (table != NULL) {
write_lock_bh(&table->tb6_lock);
fib6_clean_tree(&table->tb6_root, func, prune, arg);
write_unlock_bh(&table->tb6_lock);
}
}
}
static int fib6_prune_clone(struct rt6_info *rt, void *arg) static int fib6_prune_clone(struct rt6_info *rt, void *arg)
{ {
if (rt->rt6i_flags & RTF_CACHE) { if (rt->rt6i_flags & RTF_CACHE) {
@ -1142,11 +1279,8 @@ void fib6_run_gc(unsigned long dummy)
} }
gc_args.more = 0; gc_args.more = 0;
write_lock_bh(&rt6_lock);
ndisc_dst_gc(&gc_args.more); ndisc_dst_gc(&gc_args.more);
fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); fib6_clean_all(fib6_age, 0, NULL);
write_unlock_bh(&rt6_lock);
if (gc_args.more) if (gc_args.more)
mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
@ -1165,6 +1299,8 @@ void __init fib6_init(void)
NULL, NULL); NULL, NULL);
if (!fib6_node_kmem) if (!fib6_node_kmem)
panic("cannot create fib6_nodes cache"); panic("cannot create fib6_nodes cache");
fib6_tables_init();
} }
void fib6_gc_cleanup(void) void fib6_gc_cleanup(void)

View file

@ -140,16 +140,6 @@ struct rt6_info ip6_null_entry = {
.rt6i_ref = ATOMIC_INIT(1), .rt6i_ref = ATOMIC_INIT(1),
}; };
struct fib6_node ip6_routing_table = {
.leaf = &ip6_null_entry,
.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
};
/* Protects all the ip6 fib */
DEFINE_RWLOCK(rt6_lock);
/* allocate dst with ip6_dst_ops */ /* allocate dst with ip6_dst_ops */
static __inline__ struct rt6_info *ip6_dst_alloc(void) static __inline__ struct rt6_info *ip6_dst_alloc(void)
{ {
@ -188,8 +178,14 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt)
time_after(jiffies, rt->rt6i_expires)); time_after(jiffies, rt->rt6i_expires));
} }
static inline int rt6_need_strict(struct in6_addr *daddr)
{
return (ipv6_addr_type(daddr) &
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
}
/* /*
* Route lookup. Any rt6_lock is implied. * Route lookup. Any table->tb6_lock is implied.
*/ */
static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
@ -441,27 +437,66 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
} }
#endif #endif
struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, #define BACKTRACK() \
int oif, int strict) if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
while ((fn = fn->parent) != NULL) { \
if (fn->fn_flags & RTN_TL_ROOT) { \
dst_hold(&rt->u.dst); \
goto out; \
} \
if (fn->fn_flags & RTN_RTINFO) \
goto restart; \
} \
}
static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
struct flowi *fl, int flags)
{ {
struct fib6_node *fn; struct fib6_node *fn;
struct rt6_info *rt; struct rt6_info *rt;
read_lock_bh(&rt6_lock); read_lock_bh(&table->tb6_lock);
fn = fib6_lookup(&ip6_routing_table, daddr, saddr); fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
rt = rt6_device_match(fn->leaf, oif, strict); restart:
rt = fn->leaf;
rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
BACKTRACK();
dst_hold(&rt->u.dst); dst_hold(&rt->u.dst);
rt->u.dst.__use++; out:
read_unlock_bh(&rt6_lock); read_unlock_bh(&table->tb6_lock);
rt->u.dst.lastuse = jiffies; rt->u.dst.lastuse = jiffies;
if (rt->u.dst.error == 0) rt->u.dst.__use++;
return rt;
dst_release(&rt->u.dst); return rt;
}
struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
int oif, int strict)
{
struct flowi fl = {
.oif = oif,
.nl_u = {
.ip6_u = {
.daddr = *daddr,
/* TODO: saddr */
},
},
};
struct dst_entry *dst;
int flags = strict ? RT6_F_STRICT : 0;
dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
if (dst->error == 0)
return (struct rt6_info *) dst;
dst_release(dst);
return NULL; return NULL;
} }
/* ip6_ins_rt is called with FREE rt6_lock. /* ip6_ins_rt is called with FREE table->tb6_lock.
It takes new route entry, the addition fails by any reason the It takes new route entry, the addition fails by any reason the
route is freed. In any case, if caller does not hold it, it may route is freed. In any case, if caller does not hold it, it may
be destroyed. be destroyed.
@ -471,10 +506,12 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
void *_rtattr, struct netlink_skb_parms *req) void *_rtattr, struct netlink_skb_parms *req)
{ {
int err; int err;
struct fib6_table *table;
write_lock_bh(&rt6_lock); table = rt->rt6i_table;
err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req); write_lock_bh(&table->tb6_lock);
write_unlock_bh(&rt6_lock); err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
write_unlock_bh(&table->tb6_lock);
return err; return err;
} }
@ -532,116 +569,34 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
return rt; return rt;
} }
#define BACKTRACK() \ struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl,
if (rt == &ip6_null_entry) { \ int flags)
while ((fn = fn->parent) != NULL) { \
if (fn->fn_flags & RTN_ROOT) { \
goto out; \
} \
if (fn->fn_flags & RTN_RTINFO) \
goto restart; \
} \
}
void ip6_route_input(struct sk_buff *skb)
{ {
struct fib6_node *fn; struct fib6_node *fn;
struct rt6_info *rt, *nrt; struct rt6_info *rt, *nrt;
int strict; int strict = 0;
int attempts = 3; int attempts = 3;
int err; int err;
int reachable = RT6_SELECT_F_REACHABLE; int reachable = RT6_SELECT_F_REACHABLE;
strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; if (flags & RT6_F_STRICT)
strict = RT6_SELECT_F_IFACE;
relookup: relookup:
read_lock_bh(&rt6_lock); read_lock_bh(&table->tb6_lock);
restart_2: restart_2:
fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
&skb->nh.ipv6h->saddr);
restart: restart:
rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable); rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
BACKTRACK(); BACKTRACK();
if (rt == &ip6_null_entry || if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE) rt->rt6i_flags & RTF_CACHE)
goto out; goto out;
dst_hold(&rt->u.dst); dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock); read_unlock_bh(&table->tb6_lock);
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
else {
#if CLONE_OFFLINK_ROUTE
nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
#else
goto out2;
#endif
}
dst_release(&rt->u.dst);
rt = nrt ? : &ip6_null_entry;
dst_hold(&rt->u.dst);
if (nrt) {
err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
if (!err)
goto out2;
}
if (--attempts <= 0)
goto out2;
/*
* Race condition! In the gap, when rt6_lock was
* released someone could insert this route. Relookup.
*/
dst_release(&rt->u.dst);
goto relookup;
out:
if (reachable) {
reachable = 0;
goto restart_2;
}
dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock);
out2:
rt->u.dst.lastuse = jiffies;
rt->u.dst.__use++;
skb->dst = (struct dst_entry *) rt;
return;
}
struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
{
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
int strict;
int attempts = 3;
int err;
int reachable = RT6_SELECT_F_REACHABLE;
strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
relookup:
read_lock_bh(&rt6_lock);
restart_2:
fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
BACKTRACK();
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock);
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
@ -667,7 +622,7 @@ restart:
goto out2; goto out2;
/* /*
* Race condition! In the gap, when rt6_lock was * Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup. * released someone could insert this route. Relookup.
*/ */
dst_release(&rt->u.dst); dst_release(&rt->u.dst);
@ -679,11 +634,116 @@ out:
goto restart_2; goto restart_2;
} }
dst_hold(&rt->u.dst); dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock); read_unlock_bh(&table->tb6_lock);
out2: out2:
rt->u.dst.lastuse = jiffies; rt->u.dst.lastuse = jiffies;
rt->u.dst.__use++; rt->u.dst.__use++;
return &rt->u.dst;
return rt;
}
void ip6_route_input(struct sk_buff *skb)
{
struct ipv6hdr *iph = skb->nh.ipv6h;
struct flowi fl = {
.iif = skb->dev->ifindex,
.nl_u = {
.ip6_u = {
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
},
},
.proto = iph->nexthdr,
};
int flags = 0;
if (rt6_need_strict(&iph->daddr))
flags |= RT6_F_STRICT;
skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
}
static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
struct flowi *fl, int flags)
{
struct fib6_node *fn;
struct rt6_info *rt, *nrt;
int strict = 0;
int attempts = 3;
int err;
int reachable = RT6_SELECT_F_REACHABLE;
if (flags & RT6_F_STRICT)
strict = RT6_SELECT_F_IFACE;
relookup:
read_lock_bh(&table->tb6_lock);
restart_2:
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
BACKTRACK();
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
dst_hold(&rt->u.dst);
read_unlock_bh(&table->tb6_lock);
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
else {
#if CLONE_OFFLINK_ROUTE
nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
#else
goto out2;
#endif
}
dst_release(&rt->u.dst);
rt = nrt ? : &ip6_null_entry;
dst_hold(&rt->u.dst);
if (nrt) {
err = ip6_ins_rt(nrt, NULL, NULL, NULL);
if (!err)
goto out2;
}
if (--attempts <= 0)
goto out2;
/*
* Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup.
*/
dst_release(&rt->u.dst);
goto relookup;
out:
if (reachable) {
reachable = 0;
goto restart_2;
}
dst_hold(&rt->u.dst);
read_unlock_bh(&table->tb6_lock);
out2:
rt->u.dst.lastuse = jiffies;
rt->u.dst.__use++;
return rt;
}
struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
{
int flags = 0;
if (rt6_need_strict(&fl->fl6_dst))
flags |= RT6_F_STRICT;
return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
} }
@ -906,7 +966,8 @@ int ipv6_get_hoplimit(struct net_device *dev)
*/ */
int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
void *_rtattr, struct netlink_skb_parms *req) void *_rtattr, struct netlink_skb_parms *req,
u32 table_id)
{ {
int err; int err;
struct rtmsg *r; struct rtmsg *r;
@ -914,6 +975,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
struct rt6_info *rt = NULL; struct rt6_info *rt = NULL;
struct net_device *dev = NULL; struct net_device *dev = NULL;
struct inet6_dev *idev = NULL; struct inet6_dev *idev = NULL;
struct fib6_table *table;
int addr_type; int addr_type;
rta = (struct rtattr **) _rtattr; rta = (struct rtattr **) _rtattr;
@ -937,6 +999,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
if (rtmsg->rtmsg_metric == 0) if (rtmsg->rtmsg_metric == 0)
rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
table = fib6_new_table(table_id);
if (table == NULL) {
err = -ENOBUFS;
goto out;
}
rt = ip6_dst_alloc(); rt = ip6_dst_alloc();
if (rt == NULL) { if (rt == NULL) {
@ -1093,6 +1161,7 @@ install_route:
rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
rt->u.dst.dev = dev; rt->u.dst.dev = dev;
rt->rt6i_idev = idev; rt->rt6i_idev = idev;
rt->rt6i_table = table;
return ip6_ins_rt(rt, nlh, _rtattr, req); return ip6_ins_rt(rt, nlh, _rtattr, req);
out: out:
@ -1108,26 +1177,35 @@ out:
int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
{ {
int err; int err;
struct fib6_table *table;
write_lock_bh(&rt6_lock); table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
err = fib6_del(rt, nlh, _rtattr, req); err = fib6_del(rt, nlh, _rtattr, req);
dst_release(&rt->u.dst); dst_release(&rt->u.dst);
write_unlock_bh(&rt6_lock); write_unlock_bh(&table->tb6_lock);
return err; return err;
} }
static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
void *_rtattr, struct netlink_skb_parms *req,
u32 table_id)
{ {
struct fib6_table *table;
struct fib6_node *fn; struct fib6_node *fn;
struct rt6_info *rt; struct rt6_info *rt;
int err = -ESRCH; int err = -ESRCH;
read_lock_bh(&rt6_lock); table = fib6_get_table(table_id);
if (table == NULL)
return err;
fn = fib6_locate(&ip6_routing_table, read_lock_bh(&table->tb6_lock);
fn = fib6_locate(&table->tb6_root,
&rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
&rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
@ -1144,12 +1222,12 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
rtmsg->rtmsg_metric != rt->rt6i_metric) rtmsg->rtmsg_metric != rt->rt6i_metric)
continue; continue;
dst_hold(&rt->u.dst); dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock); read_unlock_bh(&table->tb6_lock);
return ip6_del_rt(rt, nlh, _rtattr, req); return ip6_del_rt(rt, nlh, _rtattr, req);
} }
} }
read_unlock_bh(&rt6_lock); read_unlock_bh(&table->tb6_lock);
return err; return err;
} }
@ -1161,10 +1239,15 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
struct neighbour *neigh, u8 *lladdr, int on_link) struct neighbour *neigh, u8 *lladdr, int on_link)
{ {
struct rt6_info *rt, *nrt = NULL; struct rt6_info *rt, *nrt = NULL;
int strict;
struct fib6_node *fn; struct fib6_node *fn;
struct fib6_table *table;
struct netevent_redirect netevent; struct netevent_redirect netevent;
/* TODO: Very lazy, might need to check all tables */
table = fib6_get_table(RT6_TABLE_MAIN);
if (table == NULL)
return;
/* /*
* Get the "current" route for this destination and * Get the "current" route for this destination and
* check if the redirect has come from approriate router. * check if the redirect has come from approriate router.
@ -1175,10 +1258,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
* is a bit fuzzy and one might need to check all possible * is a bit fuzzy and one might need to check all possible
* routes. * routes.
*/ */
strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
read_lock_bh(&rt6_lock); read_lock_bh(&table->tb6_lock);
fn = fib6_lookup(&ip6_routing_table, dest, NULL); fn = fib6_lookup(&table->tb6_root, dest, NULL);
restart: restart:
for (rt = fn->leaf; rt; rt = rt->u.next) { for (rt = fn->leaf; rt; rt = rt->u.next) {
/* /*
@ -1201,7 +1283,7 @@ restart:
} }
if (rt) if (rt)
dst_hold(&rt->u.dst); dst_hold(&rt->u.dst);
else if (strict) { else if (rt6_need_strict(dest)) {
while ((fn = fn->parent) != NULL) { while ((fn = fn->parent) != NULL) {
if (fn->fn_flags & RTN_ROOT) if (fn->fn_flags & RTN_ROOT)
break; break;
@ -1209,7 +1291,7 @@ restart:
goto restart; goto restart;
} }
} }
read_unlock_bh(&rt6_lock); read_unlock_bh(&table->tb6_lock);
if (!rt) { if (!rt) {
if (net_ratelimit()) if (net_ratelimit())
@ -1384,6 +1466,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
#ifdef CONFIG_IPV6_SUBTREES #ifdef CONFIG_IPV6_SUBTREES
memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif #endif
rt->rt6i_table = ort->rt6i_table;
} }
return rt; return rt;
} }
@ -1394,9 +1477,14 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
{ {
struct fib6_node *fn; struct fib6_node *fn;
struct rt6_info *rt = NULL; struct rt6_info *rt = NULL;
struct fib6_table *table;
write_lock_bh(&rt6_lock); table = fib6_get_table(RT6_TABLE_INFO);
fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0); if (table == NULL)
return NULL;
write_lock_bh(&table->tb6_lock);
fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
if (!fn) if (!fn)
goto out; goto out;
@ -1411,7 +1499,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
break; break;
} }
out: out:
write_unlock_bh(&rt6_lock); write_unlock_bh(&table->tb6_lock);
return rt; return rt;
} }
@ -1433,7 +1521,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
rtmsg.rtmsg_flags |= RTF_DEFAULT; rtmsg.rtmsg_flags |= RTF_DEFAULT;
rtmsg.rtmsg_ifindex = ifindex; rtmsg.rtmsg_ifindex = ifindex;
ip6_route_add(&rtmsg, NULL, NULL, NULL); ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
} }
@ -1442,12 +1530,14 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
{ {
struct rt6_info *rt; struct rt6_info *rt;
struct fib6_node *fn; struct fib6_table *table;
fn = &ip6_routing_table; table = fib6_get_table(RT6_TABLE_DFLT);
if (table == NULL)
return NULL;
write_lock_bh(&rt6_lock); write_lock_bh(&table->tb6_lock);
for (rt = fn->leaf; rt; rt=rt->u.next) { for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
if (dev == rt->rt6i_dev && if (dev == rt->rt6i_dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr)) ipv6_addr_equal(&rt->rt6i_gateway, addr))
@ -1455,7 +1545,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
} }
if (rt) if (rt)
dst_hold(&rt->u.dst); dst_hold(&rt->u.dst);
write_unlock_bh(&rt6_lock); write_unlock_bh(&table->tb6_lock);
return rt; return rt;
} }
@ -1474,28 +1564,31 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
rtmsg.rtmsg_ifindex = dev->ifindex; rtmsg.rtmsg_ifindex = dev->ifindex;
ip6_route_add(&rtmsg, NULL, NULL, NULL); ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
return rt6_get_dflt_router(gwaddr, dev); return rt6_get_dflt_router(gwaddr, dev);
} }
void rt6_purge_dflt_routers(void) void rt6_purge_dflt_routers(void)
{ {
struct rt6_info *rt; struct rt6_info *rt;
struct fib6_table *table;
/* NOTE: Keep consistent with rt6_get_dflt_router */
table = fib6_get_table(RT6_TABLE_DFLT);
if (table == NULL)
return;
restart: restart:
read_lock_bh(&rt6_lock); read_lock_bh(&table->tb6_lock);
for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
dst_hold(&rt->u.dst); dst_hold(&rt->u.dst);
read_unlock_bh(&table->tb6_lock);
read_unlock_bh(&rt6_lock);
ip6_del_rt(rt, NULL, NULL, NULL); ip6_del_rt(rt, NULL, NULL, NULL);
goto restart; goto restart;
} }
} }
read_unlock_bh(&rt6_lock); read_unlock_bh(&table->tb6_lock);
} }
int ipv6_route_ioctl(unsigned int cmd, void __user *arg) int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
@ -1516,10 +1609,12 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
rtnl_lock(); rtnl_lock();
switch (cmd) { switch (cmd) {
case SIOCADDRT: case SIOCADDRT:
err = ip6_route_add(&rtmsg, NULL, NULL, NULL); err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
RT6_TABLE_MAIN);
break; break;
case SIOCDELRT: case SIOCDELRT:
err = ip6_route_del(&rtmsg, NULL, NULL, NULL); err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
RT6_TABLE_MAIN);
break; break;
default: default:
err = -EINVAL; err = -EINVAL;
@ -1593,6 +1688,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
ipv6_addr_copy(&rt->rt6i_dst.addr, addr); ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
rt->rt6i_dst.plen = 128; rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
atomic_set(&rt->u.dst.__refcnt, 1); atomic_set(&rt->u.dst.__refcnt, 1);
@ -1611,9 +1707,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
void rt6_ifdown(struct net_device *dev) void rt6_ifdown(struct net_device *dev)
{ {
write_lock_bh(&rt6_lock); fib6_clean_all(fib6_ifdown, 0, dev);
fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
write_unlock_bh(&rt6_lock);
} }
struct rt6_mtu_change_arg struct rt6_mtu_change_arg
@ -1663,13 +1757,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
void rt6_mtu_change(struct net_device *dev, unsigned mtu) void rt6_mtu_change(struct net_device *dev, unsigned mtu)
{ {
struct rt6_mtu_change_arg arg; struct rt6_mtu_change_arg arg = {
.dev = dev,
.mtu = mtu,
};
arg.dev = dev; fib6_clean_all(rt6_mtu_change_route, 0, &arg);
arg.mtu = mtu;
read_lock_bh(&rt6_lock);
fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
read_unlock_bh(&rt6_lock);
} }
static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
@ -1719,7 +1812,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
return -EINVAL; return -EINVAL;
return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb)); return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
} }
int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@ -1729,7 +1822,7 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
return -EINVAL; return -EINVAL;
return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb)); return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
} }
struct rt6_rtnl_dump_arg struct rt6_rtnl_dump_arg
@ -1761,6 +1854,10 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
rtm->rtm_dst_len = rt->rt6i_dst.plen; rtm->rtm_dst_len = rt->rt6i_dst.plen;
rtm->rtm_src_len = rt->rt6i_src.plen; rtm->rtm_src_len = rt->rt6i_src.plen;
rtm->rtm_tos = 0; rtm->rtm_tos = 0;
if (rt->rt6i_table)
rtm->rtm_table = rt->rt6i_table->tb6_id;
else
rtm->rtm_table = RT6_TABLE_UNSPEC;
rtm->rtm_table = RT_TABLE_MAIN; rtm->rtm_table = RT_TABLE_MAIN;
if (rt->rt6i_flags&RTF_REJECT) if (rt->rt6i_flags&RTF_REJECT)
rtm->rtm_type = RTN_UNREACHABLE; rtm->rtm_type = RTN_UNREACHABLE;
@ -1868,7 +1965,6 @@ static void fib6_dump_end(struct netlink_callback *cb)
if (w) { if (w) {
cb->args[0] = 0; cb->args[0] = 0;
fib6_walker_unlink(w);
kfree(w); kfree(w);
} }
cb->done = (void*)cb->args[1]; cb->done = (void*)cb->args[1];
@ -1883,13 +1979,20 @@ static int fib6_dump_done(struct netlink_callback *cb)
int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct fib6_table *table;
struct rt6_rtnl_dump_arg arg; struct rt6_rtnl_dump_arg arg;
struct fib6_walker_t *w; struct fib6_walker_t *w;
int res; int i, res = 0;
arg.skb = skb; arg.skb = skb;
arg.cb = cb; arg.cb = cb;
/*
* cb->args[0] = pointer to walker structure
* cb->args[1] = saved cb->done() pointer
* cb->args[2] = current table being dumped
*/
w = (void*)cb->args[0]; w = (void*)cb->args[0];
if (w == NULL) { if (w == NULL) {
/* New dump: /* New dump:
@ -1905,24 +2008,48 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
w = kzalloc(sizeof(*w), GFP_ATOMIC); w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (w == NULL) if (w == NULL)
return -ENOMEM; return -ENOMEM;
RT6_TRACE("dump<%p", w);
w->root = &ip6_routing_table;
w->func = fib6_dump_node; w->func = fib6_dump_node;
w->args = &arg; w->args = &arg;
cb->args[0] = (long)w; cb->args[0] = (long)w;
read_lock_bh(&rt6_lock); cb->args[2] = FIB6_TABLE_MIN;
res = fib6_walk(w);
read_unlock_bh(&rt6_lock);
} else { } else {
w->args = &arg; w->args = &arg;
read_lock_bh(&rt6_lock); i = cb->args[2];
res = fib6_walk_continue(w); if (i > FIB6_TABLE_MAX)
read_unlock_bh(&rt6_lock); goto end;
table = fib6_get_table(i);
if (table != NULL) {
read_lock_bh(&table->tb6_lock);
w->root = &table->tb6_root;
res = fib6_walk_continue(w);
read_unlock_bh(&table->tb6_lock);
if (res != 0) {
if (res < 0)
fib6_walker_unlink(w);
goto end;
}
}
fib6_walker_unlink(w);
cb->args[2] = ++i;
} }
#if RT6_DEBUG >= 3
if (res <= 0 && skb->len == 0) for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) {
RT6_TRACE("%p>dump end\n", w); table = fib6_get_table(i);
#endif if (table == NULL)
continue;
read_lock_bh(&table->tb6_lock);
w->root = &table->tb6_root;
res = fib6_walk(w);
read_unlock_bh(&table->tb6_lock);
if (res)
break;
}
end:
cb->args[2] = i;
res = res < 0 ? res : skb->len; res = res < 0 ? res : skb->len;
/* res < 0 is an error. (really, impossible) /* res < 0 is an error. (really, impossible)
res == 0 means that dump is complete, but skb still can contain data. res == 0 means that dump is complete, but skb still can contain data.
@ -2102,16 +2229,13 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
{ {
struct rt6_proc_arg arg; struct rt6_proc_arg arg = {
arg.buffer = buffer; .buffer = buffer,
arg.offset = offset; .offset = offset,
arg.length = length; .length = length,
arg.skip = 0; };
arg.len = 0;
read_lock_bh(&rt6_lock); fib6_clean_all(rt6_info_route, 0, &arg);
fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
read_unlock_bh(&rt6_lock);
*start = buffer; *start = buffer;
if (offset) if (offset)