net: support marking accepting TCP sockets

When using mark-based routing, sockets returned from accept()
may need to be marked differently depending on the incoming
connection request.

This is the case, for example, if different socket marks identify
different networks: a listening socket may want to accept
connections from all networks, but each connection should be
marked with the network that the request came in on, so that
subsequent packets are sent on the correct network.

This patch adds a sysctl to mark TCP sockets based on the fwmark
of the incoming SYN packet. If enabled, and an unmarked socket
receives a SYN, then the SYN packet's fwmark is written to the
connection's inet_request_sock, and later written back to the
accepted socket when the connection is established.  If the
socket already has a nonzero mark, then the behaviour is the same
as it is today, i.e., the listening socket's fwmark is used.

Black-box tested using user-mode linux:

- IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the
  mark of the incoming SYN packet.
- The socket returned by accept() is marked with the mark of the
  incoming SYN packet.
- Tested with syncookies=1 and syncookies=2.

Change-Id: I26bc1eceefd2c588d73b921865ab70e4645ade57
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Git-commit: 6ba3a0e3b112bdb47858e97aa763706ba26ca5ea
Git-repo: https://android.googlesource.com/kernel/common.git
Signed-off-by: Ian Maund <imaund@codeaurora.org>
This commit is contained in:
Lorenzo Colitti 2014-03-26 13:03:12 +09:00 committed by Ian Maund
parent 9366fad7a2
commit c5f40c905b
10 changed files with 39 additions and 5 deletions

View file

@ -477,6 +477,16 @@ tcp_fastopen - INTEGER
See include/net/tcp.h and the code for more details.
tcp_fwmark_accept - BOOLEAN
If set, incoming connections to listening sockets that do not have a
socket mark will set the mark of the accepting socket to the fwmark of
the incoming SYN packet. This will cause all packets on that connection
(starting from the first SYNACK) to be sent with that fwmark. The
listening socket's mark is unchanged. Listening sockets that already
have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
unaffected.
Default: 0
tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 255. Default value

View file

@ -88,6 +88,7 @@ struct inet_request_sock {
acked : 1,
no_srccheck: 1;
kmemcheck_bitfield_end(flags);
u32 ir_mark;
struct ip_options_rcu *opt;
};
@ -96,6 +97,14 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
return (struct inet_request_sock *)sk;
}
static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
{
if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
return skb->mark;
return sk->sk_mark;
}
struct inet_cork {
unsigned int flags;
__be32 addr;

View file

@ -65,6 +65,7 @@ struct netns_ipv4 {
int sysctl_tcp_ecn;
int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;
kgid_t sysctl_ping_group_range[2];
long sysctl_tcp_mem[3];

View file

@ -417,7 +417,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
struct net *net = sock_net(sk);
int flags = inet_sk_flowi_flags(sk);
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol,
flags,
@ -454,7 +454,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
rcu_read_lock();
opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
@ -688,6 +688,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
newsk->sk_write_space = sk_stream_write_space;
newsk->sk_mark = inet_rsk(req)->ir_mark;
newicsk->icsk_retransmits = 0;
newicsk->icsk_backoff = 0;
newicsk->icsk_probes_out = 0;

View file

@ -312,6 +312,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
ireq->rmt_port = th->source;
ireq->loc_addr = ip_hdr(skb)->daddr;
ireq->rmt_addr = ip_hdr(skb)->saddr;
ireq->ir_mark = inet_request_mark(sk, skb);
ireq->ecn_ok = ecn_ok;
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
@ -348,7 +349,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
* hasn't changed since we received the original syn, but I see
* no easy way to do this.
*/
flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
inet_sk_flowi_flags(sk),
(opt && opt->srr) ? opt->faddr : ireq->rmt_addr,

View file

@ -903,6 +903,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tcp_fwmark_accept",
.data = &init_net.ipv4.sysctl_tcp_fwmark_accept,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ }
};

View file

@ -1527,6 +1527,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->rmt_addr = saddr;
ireq->no_srccheck = inet_sk(sk)->transparent;
ireq->opt = tcp_v4_save_options(skb);
ireq->ir_mark = inet_request_mark(sk, skb);
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;

View file

@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
final_p = fl6_update_dst(fl6, np->opt, &final);
fl6->saddr = treq->loc_addr;
fl6->flowi6_oif = treq->iif;
fl6->flowi6_mark = sk->sk_mark;
fl6->flowi6_mark = inet_rsk(req)->ir_mark;
fl6->fl6_dport = inet_rsk(req)->rmt_port;
fl6->fl6_sport = inet_rsk(req)->loc_port;
security_req_classify_flow(req, flowi6_to_flowi(fl6));

View file

@ -212,6 +212,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq6->iif = inet6_iif(skb);
ireq->ir_mark = inet_request_mark(sk, skb);
req->expires = 0UL;
req->num_retrans = 0;
ireq->ecn_ok = ecn_ok;
@ -238,7 +240,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
final_p = fl6_update_dst(&fl6, np->opt, &final);
fl6.saddr = ireq6->loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.flowi6_mark = ireq->ir_mark;
fl6.fl6_dport = inet_rsk(req)->rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
security_req_classify_flow(req, flowi6_to_flowi(&fl6));

View file

@ -1000,6 +1000,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
TCP_ECN_create_request(req, skb, sock_net(sk));
treq->iif = sk->sk_bound_dev_if;
inet_rsk(req)->ir_mark = inet_request_mark(sk, skb);
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&