net: add a sysctl to reflect the fwmark on replies
Kernel-originated IP packets that have no user socket associated with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.) are emitted with a mark of zero. Add a sysctl to make them have the same mark as the packet they are replying to. This allows an administrator that wishes to do so to use mark-based routing, firewalling, etc. for these replies by marking the original packets inbound. Tested using user-mode linux: - ICMP/ICMPv6 echo replies and errors. - TCP RST packets (IPv4 and IPv6). Change-Id: I6873d973196797bcf32e2e91976df647c7e8b85a Signed-off-by: Lorenzo Colitti <lorenzo@google.com> Git-commit: 5a87fa6a43733e241406e8d62fe28fdc0735bf93 Git-repo: https://android.googlesource.com/kernel/common.git [imaund@codeaurora.org: Resolve trivial merge conflicts] Signed-off-by: Ian Maund <imaund@codeaurora.org>
This commit is contained in:
parent
93686b2549
commit
4ecfac314b
|
@ -22,6 +22,13 @@ ip_no_pmtu_disc - BOOLEAN
|
|||
min_pmtu - INTEGER
|
||||
default 552 - minimum discovered Path MTU
|
||||
|
||||
fwmark_reflect - BOOLEAN
|
||||
Controls the fwmark of kernel-generated IPv4 reply packets that are not
|
||||
associated with a socket for example, TCP RSTs or ICMP echo replies).
|
||||
If unset, these packets have a fwmark of zero. If set, they have the
|
||||
fwmark of the packet they are replying to.
|
||||
Default: 0
|
||||
|
||||
route/max_size - INTEGER
|
||||
Maximum number of routes allowed in the kernel. Increase
|
||||
this when using large numbers of interfaces and/or routes.
|
||||
|
@ -1099,6 +1106,13 @@ proxy_ndp - INTEGER
|
|||
2 NDP packets are sent to userspace, where a userspace proxy
|
||||
can be implemented
|
||||
|
||||
fwmark_reflect - BOOLEAN
|
||||
Controls the fwmark of kernel-generated IPv6 reply packets that are not
|
||||
associated with a socket for example, TCP RSTs or ICMPv6 echo replies).
|
||||
If unset, these packets have a fwmark of zero. If set, they have the
|
||||
fwmark of the packet they are replying to.
|
||||
Default: 0
|
||||
|
||||
conf/interface/*:
|
||||
Change special settings per interface.
|
||||
|
||||
|
|
|
@ -225,6 +225,9 @@ extern void ipfrag_init(void);
|
|||
|
||||
extern void ip_static_sysctl_init(void);
|
||||
|
||||
#define IP4_REPLY_MARK(net, mark) \
|
||||
((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
|
||||
|
||||
static inline bool ip_is_fragment(const struct iphdr *iph)
|
||||
{
|
||||
return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
|
||||
|
|
|
@ -111,6 +111,9 @@ struct frag_hdr {
|
|||
|
||||
#define IP6_MF 0x0001
|
||||
|
||||
#define IP6_REPLY_MARK(net, mark) \
|
||||
((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
|
||||
|
||||
#include <net/sock.h>
|
||||
|
||||
/* sysctls */
|
||||
|
|
|
@ -64,6 +64,8 @@ struct netns_ipv4 {
|
|||
|
||||
int sysctl_tcp_ecn;
|
||||
|
||||
int sysctl_fwmark_reflect;
|
||||
|
||||
kgid_t sysctl_ping_group_range[2];
|
||||
long sysctl_tcp_mem[3];
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
|
|||
int ip6_rt_mtu_expires;
|
||||
int ip6_rt_min_advmss;
|
||||
int icmpv6_time;
|
||||
int fwmark_reflect;
|
||||
};
|
||||
|
||||
struct netns_ipv6 {
|
||||
|
|
|
@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
|
|||
struct sock *sk;
|
||||
struct inet_sock *inet;
|
||||
__be32 daddr, saddr;
|
||||
u32 mark = IP4_REPLY_MARK(net, skb->mark);
|
||||
|
||||
if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
|
||||
return;
|
||||
|
@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
|
|||
icmp_param->data.icmph.checksum = 0;
|
||||
|
||||
inet->tos = ip_hdr(skb)->tos;
|
||||
sk->sk_mark = mark;
|
||||
daddr = ipc.addr = ip_hdr(skb)->saddr;
|
||||
saddr = fib_compute_spec_dst(skb);
|
||||
ipc.opt = NULL;
|
||||
|
@ -361,6 +363,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
|
|||
memset(&fl4, 0, sizeof(fl4));
|
||||
fl4.daddr = daddr;
|
||||
fl4.saddr = saddr;
|
||||
fl4.flowi4_mark = mark;
|
||||
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
|
||||
fl4.flowi4_proto = IPPROTO_ICMP;
|
||||
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
|
||||
|
@ -379,7 +382,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
|
|||
struct flowi4 *fl4,
|
||||
struct sk_buff *skb_in,
|
||||
const struct iphdr *iph,
|
||||
__be32 saddr, u8 tos,
|
||||
__be32 saddr, u8 tos, u32 mark,
|
||||
int type, int code,
|
||||
struct icmp_bxm *param)
|
||||
{
|
||||
|
@ -391,6 +394,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
|
|||
fl4->daddr = (param->replyopts.opt.opt.srr ?
|
||||
param->replyopts.opt.opt.faddr : iph->saddr);
|
||||
fl4->saddr = saddr;
|
||||
fl4->flowi4_mark = mark;
|
||||
fl4->flowi4_tos = RT_TOS(tos);
|
||||
fl4->flowi4_proto = IPPROTO_ICMP;
|
||||
fl4->fl4_icmp_type = type;
|
||||
|
@ -488,6 +492,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
|
|||
struct flowi4 fl4;
|
||||
__be32 saddr;
|
||||
u8 tos;
|
||||
u32 mark;
|
||||
struct net *net;
|
||||
struct sock *sk;
|
||||
|
||||
|
@ -584,6 +589,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
|
|||
tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
|
||||
IPTOS_PREC_INTERNETCONTROL) :
|
||||
iph->tos;
|
||||
mark = IP4_REPLY_MARK(net, skb_in->mark);
|
||||
|
||||
if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
|
||||
goto out_unlock;
|
||||
|
@ -600,11 +606,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
|
|||
icmp_param.skb = skb_in;
|
||||
icmp_param.offset = skb_network_offset(skb_in);
|
||||
inet_sk(sk)->tos = tos;
|
||||
sk->sk_mark = mark;
|
||||
ipc.addr = iph->saddr;
|
||||
ipc.opt = &icmp_param.replyopts.opt;
|
||||
ipc.tx_flags = 0;
|
||||
|
||||
rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
|
||||
rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
|
||||
type, code, &icmp_param);
|
||||
if (IS_ERR(rt))
|
||||
goto out_unlock;
|
||||
|
|
|
@ -1497,7 +1497,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
|
|||
daddr = replyopts.opt.opt.faddr;
|
||||
}
|
||||
|
||||
flowi4_init_output(&fl4, arg->bound_dev_if, 0,
|
||||
flowi4_init_output(&fl4, arg->bound_dev_if,
|
||||
IP4_REPLY_MARK(net, skb->mark),
|
||||
RT_TOS(arg->tos),
|
||||
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
|
||||
ip_reply_arg_flowi_flags(arg),
|
||||
|
|
|
@ -896,6 +896,13 @@ static struct ctl_table ipv4_net_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = ipv4_tcp_mem,
|
||||
},
|
||||
{
|
||||
.procname = "fwmark_reflect",
|
||||
.data = &init_net.ipv4.sysctl_fwmark_reflect,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
|
|
|
@ -397,6 +397,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
|
|||
int len;
|
||||
int hlimit;
|
||||
int err = 0;
|
||||
u32 mark = IP6_REPLY_MARK(net, skb->mark);
|
||||
|
||||
if ((u8 *)hdr < skb->head ||
|
||||
(skb->network_header + sizeof(*hdr)) > skb->tail)
|
||||
|
@ -462,6 +463,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
|
|||
fl6.daddr = hdr->saddr;
|
||||
if (saddr)
|
||||
fl6.saddr = *saddr;
|
||||
fl6.flowi6_mark = mark;
|
||||
fl6.flowi6_oif = iif;
|
||||
fl6.fl6_icmp_type = type;
|
||||
fl6.fl6_icmp_code = code;
|
||||
|
@ -470,6 +472,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
|
|||
sk = icmpv6_xmit_lock(net);
|
||||
if (sk == NULL)
|
||||
return;
|
||||
sk->sk_mark = mark;
|
||||
np = inet6_sk(sk);
|
||||
|
||||
if (!icmpv6_xrlim_allow(sk, type, &fl6))
|
||||
|
@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
|
|||
struct dst_entry *dst;
|
||||
int err = 0;
|
||||
int hlimit;
|
||||
u32 mark = IP6_REPLY_MARK(net, skb->mark);
|
||||
|
||||
saddr = &ipv6_hdr(skb)->daddr;
|
||||
|
||||
|
@ -567,11 +571,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
|
|||
fl6.saddr = *saddr;
|
||||
fl6.flowi6_oif = skb->dev->ifindex;
|
||||
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
|
||||
fl6.flowi6_mark = mark;
|
||||
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
|
||||
|
||||
sk = icmpv6_xmit_lock(net);
|
||||
if (sk == NULL)
|
||||
return;
|
||||
sk->sk_mark = mark;
|
||||
np = inet6_sk(sk);
|
||||
|
||||
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
|
||||
|
|
|
@ -24,6 +24,13 @@ static ctl_table ipv6_table_template[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{
|
||||
.procname = "fwmark_reflect",
|
||||
.data = &init_net.ipv6.sysctl.fwmark_reflect,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
|
|
|
@ -791,6 +791,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
|
|||
fl6.flowi6_proto = IPPROTO_TCP;
|
||||
if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
|
||||
fl6.flowi6_oif = inet6_iif(skb);
|
||||
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
|
||||
fl6.fl6_dport = t1->dest;
|
||||
fl6.fl6_sport = t1->source;
|
||||
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
|
||||
|
|
Loading…
Reference in New Issue