From 7c56badc9a66aecf61a2aad3fa23ce0599e04196 Mon Sep 17 00:00:00 2001 From: Roger Hu Date: Sat, 4 Sep 2021 09:17:12 -0700 Subject: [PATCH 01/12] kernel: Revert "tcp: do not lock listener to process SYN packets" This commit belongs to the patch set (https://lwn.net/Articles/659199/) that attempts to remove the use of locks on the socket table by relocating the SYN table to a separate hash table and adding a spin lock to protect the SYN request queue. Adding only this commit introduces a race condition for LineageOS kernels for TCP listens, since the TCP SYN data structures can be corrupted. A TCP curl bomb on a TCP listen port will corrupt the SYN accept backlog: for i in $(seq 1 400); do curl -x localhost:443 https://myhost.com -L --connect-timeout 30 -o /dev/null -sS & done Run `ss -nltp` and usually the RecVQ column does not drain to 0. This reverts commit 7d9f104f9cabe1d72a50c4816a48f64fc1da7a64. This really needs to be reverted across all LineageOS forks: https://gitlab.com/LineageOS/issues/android/-/issues/3916#note_669493796 Change-Id: Ia7969aeedae411677b307a8e094f9a4cc02b801d --- net/ipv4/tcp_ipv4.c | 8 +------- net/ipv6/tcp_ipv6.c | 8 +------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 600c447fef23..019b35231420 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1589,7 +1589,7 @@ static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) /* The socket must have it's spinlock held when we get - * here, unless it is a TCP_LISTEN socket. + * here. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. @@ -1728,11 +1728,6 @@ process: skb->dev = NULL; - if (sk->sk_state == TCP_LISTEN) { - ret = tcp_v4_do_rcv(sk, skb); - goto put_and_return; - } - bh_lock_sock_nested(sk); tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; @@ -1756,7 +1751,6 @@ process: } bh_unlock_sock(sk); -put_and_return: sock_put(sk); return ret; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5c140ad29aca..67b8629060c4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1464,7 +1464,7 @@ static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) } /* The socket must have it's spinlock held when we get - * here, unless it is a TCP_LISTEN socket. + * here. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. @@ -1658,11 +1658,6 @@ process: skb->dev = NULL; - if (sk->sk_state == TCP_LISTEN) { - ret = tcp_v6_do_rcv(sk, skb); - goto put_and_return; - } - bh_lock_sock_nested(sk); tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; @@ -1686,7 +1681,6 @@ process: } bh_unlock_sock(sk); -put_and_return: sock_put(sk); return ret ? -1 : 0; From 9587d567d2086eb0ff5d192406d7067ffc0d1c10 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 30 Jul 2012 14:42:38 -0700 Subject: [PATCH 02/12] BACKPORT: ipc: add COMPAT_SHMLBA support If the SHMLBA definition for a native task differs from the definition for a compat task, the do_shmat() function would need to handle both. This patch introduces COMPAT_SHMLBA, which is used by the compat shmat syscall when calling the ipc code and allows architectures such as AArch64 (where the native SHMLBA is 64k but the compat (AArch32) definition is 16k) to provide the correct semantics for compat IPC system calls. Change-Id: I0292f1fedabaa6cbbab843611aa76a8f50f47771 Cc: David S. Miller Cc: Chris Zankel Cc: Arnd Bergmann Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Kevin F. Haggerty --- arch/sparc/kernel/sys_sparc_64.c | 2 +- arch/xtensa/kernel/syscall.c | 2 +- include/linux/shm.h | 6 ++++-- ipc/compat.c | 8 ++++++-- ipc/shm.c | 11 ++++++----- ipc/syscall.c | 2 +- 6 files changed, 19 insertions(+), 12 deletions(-) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 2348cc06c6c3..47253f68b054 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -487,7 +487,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second switch (call) { case SHMAT: { ulong raddr; - err = do_shmat(first, ptr, (int)second, &raddr); + err = do_shmat(first, ptr, (int)second, &raddr, SHMLBA); if (!err) { if (put_user(raddr, (ulong __user *) third)) diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index 816e6d0d686c..05b3f093d5d7 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -44,7 +44,7 @@ asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg) unsigned long ret; long err; - err = do_shmat(shmid, shmaddr, shmflg, &ret); + err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); if (err) return err; return (long)ret; diff --git a/include/linux/shm.h b/include/linux/shm.h index 92808b86703b..edd086883ccb 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -107,12 +107,14 @@ struct shmid_kernel /* private to the kernel */ #define SHM_NORESERVE 010000 /* don't check for reservations */ #ifdef CONFIG_SYSVIPC -long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr); +long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, + unsigned long shmlba); extern int is_file_shm_hugepages(struct file *file); extern void exit_shm(struct task_struct *task); #else static inline long do_shmat(int shmid, char __user *shmaddr, - int shmflg, unsigned long *addr) + int shmflg, unsigned long *addr, + unsigned long shmlba) { return -ENOSYS; } diff --git a/ipc/compat.c b/ipc/compat.c index a6df704f521e..53cebdf80e3c 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -514,6 +514,10 @@ long compat_sys_msgctl(int first, int second, void __user *uptr) return err; } +#ifndef COMPAT_SHMLBA +#define COMPAT_SHMLBA SHMLBA +#endif + #ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC long compat_sys_shmat(int first, int second, compat_uptr_t third, int version, void __user *uptr) @@ -524,7 +528,7 @@ long compat_sys_shmat(int first, int second, compat_uptr_t third, int version, if (version == 1) return -EINVAL; - err = do_shmat(first, uptr, second, &raddr); + err = do_shmat(first, uptr, second, &raddr, COMPAT_SHMLBA); if (err < 0) return err; uaddr = compat_ptr(third); @@ -536,7 +540,7 @@ long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg) unsigned long ret; long err; - err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret); + err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA); if (err) return err; force_successful_syscall_return(); diff --git a/ipc/shm.c b/ipc/shm.c index b4ac3dc95e0f..3c245a9e67bd 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -944,7 +944,8 @@ out: * "raddr" thing points to kernel space, and there has to be a wrapper around * this. */ -long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) +long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, + unsigned long shmlba) { struct shmid_kernel *shp; unsigned long addr; @@ -964,14 +965,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) if (shmid < 0) goto out; else if ((addr = (ulong)shmaddr)) { - if (addr & (SHMLBA-1)) { + if (addr & (shmlba - 1)) { /* * Round down to the nearest multiple of shmlba. * For sane do_mmap_pgoff() parameters, avoid * round downs that trigger nil-page and MAP_FIXED. */ - if ((shmflg & SHM_RND) && addr >= SHMLBA) - addr &= ~(SHMLBA - 1); + if ((shmflg & SHM_RND) && addr >= shmlba) + addr &= ~(shmlba - 1); else #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) @@ -1098,7 +1099,7 @@ SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) unsigned long ret; long err; - err = do_shmat(shmid, shmaddr, shmflg, &ret); + err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); if (err) return err; force_successful_syscall_return(); diff --git a/ipc/syscall.c b/ipc/syscall.c index 1d6f53f6b562..0d1e32ce048e 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -73,7 +73,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, default: { unsigned long raddr; ret = do_shmat(first, (char __user *)ptr, - second, &raddr); + second, &raddr, SHMLBA); if (ret) return ret; return put_user(raddr, (unsigned long __user *) third); From cab65020e8af8a146f0e30749837716c72920259 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 May 2012 22:48:15 +0000 Subject: [PATCH 03/12] BACKPORT: net: include/net/sock.h cleanup bool/const conversions where possible __inline__ -> inline space cleanups Change-Id: I0ee0135e737edd702f753fac182b293ec5cc652a Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Kevin F. Haggerty --- include/net/sock.h | 151 ++++++++++++++++++++++----------------------- net/dccp/proto.c | 4 +- net/ipv4/tcp.c | 12 ++-- net/llc/af_llc.c | 4 +- 4 files changed, 84 insertions(+), 87 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 33b19e674504..1ca2381bd85e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -98,7 +98,7 @@ void mem_cgroup_sockets_destroy(struct cgroup *cgrp) #else /* Validate arguments and do nothing */ static inline __printf(2, 3) -void SOCK_DEBUG(struct sock *sk, const char *msg, ...) +void SOCK_DEBUG(const struct sock *sk, const char *msg, ...) { } #endif @@ -380,8 +380,8 @@ struct sock { void (*sk_data_ready)(struct sock *sk, int bytes); void (*sk_write_space)(struct sock *sk); void (*sk_error_report)(struct sock *sk); - int (*sk_backlog_rcv)(struct sock *sk, - struct sk_buff *skb); + int (*sk_backlog_rcv)(struct sock *sk, + struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); }; @@ -451,40 +451,40 @@ static inline struct sock *sk_nulls_next(const struct sock *sk) NULL; } -static inline int sk_unhashed(const struct sock *sk) +static inline bool sk_unhashed(const struct sock *sk) { return hlist_unhashed(&sk->sk_node); } -static inline int sk_hashed(const struct sock *sk) +static inline bool sk_hashed(const struct sock *sk) { return !sk_unhashed(sk); } -static __inline__ void sk_node_init(struct hlist_node *node) +static inline void sk_node_init(struct hlist_node *node) { node->pprev = NULL; } -static __inline__ void sk_nulls_node_init(struct hlist_nulls_node *node) +static inline void sk_nulls_node_init(struct hlist_nulls_node *node) { node->pprev = NULL; } -static __inline__ void __sk_del_node(struct sock *sk) +static inline void __sk_del_node(struct sock *sk) { __hlist_del(&sk->sk_node); } /* NB: equivalent to hlist_del_init_rcu */ -static __inline__ int __sk_del_node_init(struct sock *sk) +static inline bool __sk_del_node_init(struct sock *sk) { if (sk_hashed(sk)) { __sk_del_node(sk); sk_node_init(&sk->sk_node); - return 1; + return true; } - return 0; + return false; } /* Grab socket reference count. This operation is valid only @@ -506,9 +506,9 @@ static inline void __sock_put(struct sock *sk) atomic_dec(&sk->sk_refcnt); } -static __inline__ int sk_del_node_init(struct sock *sk) +static inline bool sk_del_node_init(struct sock *sk) { - int rc = __sk_del_node_init(sk); + bool rc = __sk_del_node_init(sk); if (rc) { /* paranoid for a while -acme */ @@ -519,18 +519,18 @@ static __inline__ int sk_del_node_init(struct sock *sk) } #define sk_del_node_init_rcu(sk) sk_del_node_init(sk) -static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) +static inline bool __sk_nulls_del_node_init_rcu(struct sock *sk) { if (sk_hashed(sk)) { hlist_nulls_del_init_rcu(&sk->sk_nulls_node); - return 1; + return true; } - return 0; + return false; } -static __inline__ int sk_nulls_del_node_init_rcu(struct sock *sk) +static inline bool sk_nulls_del_node_init_rcu(struct sock *sk) { - int rc = __sk_nulls_del_node_init_rcu(sk); + bool rc = __sk_nulls_del_node_init_rcu(sk); if (rc) { /* paranoid for a while -acme */ @@ -540,40 +540,40 @@ static __inline__ int sk_nulls_del_node_init_rcu(struct sock *sk) return rc; } -static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list) +static inline void __sk_add_node(struct sock *sk, struct hlist_head *list) { hlist_add_head(&sk->sk_node, list); } -static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) +static inline void sk_add_node(struct sock *sk, struct hlist_head *list) { sock_hold(sk); __sk_add_node(sk, list); } -static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) { sock_hold(sk); hlist_add_head_rcu(&sk->sk_node, list); } -static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) +static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } -static __inline__ void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) +static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { sock_hold(sk); __sk_nulls_add_node_rcu(sk, list); } -static __inline__ void __sk_del_bind_node(struct sock *sk) +static inline void __sk_del_bind_node(struct sock *sk) { __hlist_del(&sk->sk_bind_node); } -static __inline__ void sk_add_bind_node(struct sock *sk, +static inline void sk_add_bind_node(struct sock *sk, struct hlist_head *list) { hlist_add_head(&sk->sk_bind_node, list); @@ -662,7 +662,7 @@ static inline void sk_acceptq_added(struct sock *sk) sk->sk_ack_backlog++; } -static inline int sk_acceptq_is_full(struct sock *sk) +static inline bool sk_acceptq_is_full(const struct sock *sk) { return sk->sk_ack_backlog > sk->sk_max_ack_backlog; } @@ -670,19 +670,19 @@ static inline int sk_acceptq_is_full(struct sock *sk) /* * Compute minimal free write space needed to queue new packets. */ -static inline int sk_stream_min_wspace(struct sock *sk) +static inline int sk_stream_min_wspace(const struct sock *sk) { return sk->sk_wmem_queued >> 1; } -static inline int sk_stream_wspace(struct sock *sk) +static inline int sk_stream_wspace(const struct sock *sk) { return sk->sk_sndbuf - sk->sk_wmem_queued; } extern void sk_stream_write_space(struct sock *sk); -static inline int sk_stream_memory_free(struct sock *sk) +static inline bool sk_stream_memory_free(const struct sock *sk) { return sk->sk_wmem_queued < sk->sk_sndbuf; } @@ -816,26 +816,26 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size) * transport -> network interface is defined by struct inet_proto */ struct proto { - void (*close)(struct sock *sk, + void (*close)(struct sock *sk, long timeout); int (*connect)(struct sock *sk, - struct sockaddr *uaddr, + struct sockaddr *uaddr, int addr_len); int (*disconnect)(struct sock *sk, int flags); - struct sock * (*accept) (struct sock *sk, int flags, int *err); + struct sock * (*accept)(struct sock *sk, int flags, int *err); int (*ioctl)(struct sock *sk, int cmd, unsigned long arg); int (*init)(struct sock *sk); void (*destroy)(struct sock *sk); void (*shutdown)(struct sock *sk, int how); - int (*setsockopt)(struct sock *sk, int level, + int (*setsockopt)(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); - int (*getsockopt)(struct sock *sk, int level, - int optname, char __user *optval, - int __user *option); + int (*getsockopt)(struct sock *sk, int level, + int optname, char __user *optval, + int __user *option); #ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, int level, @@ -852,14 +852,14 @@ struct proto { struct msghdr *msg, size_t len); int (*recvmsg)(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, - int *addr_len); + size_t len, int noblock, int flags, + int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, int offset, size_t size, int flags); - int (*bind)(struct sock *sk, + int (*bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len); - int (*backlog_rcv) (struct sock *sk, + int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); void (*release_cb)(struct sock *sk); @@ -1183,7 +1183,7 @@ proto_memory_pressure(struct proto *prot) extern void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); extern int sock_prot_inuse_get(struct net *net, struct proto *proto); #else -static void inline sock_prot_inuse_add(struct net *net, struct proto *prot, +static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc) { } @@ -1270,24 +1270,24 @@ static inline int sk_mem_pages(int amt) return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; } -static inline int sk_has_account(struct sock *sk) +static inline bool sk_has_account(struct sock *sk) { /* return true if protocol supports memory accounting */ return !!sk->sk_prot->memory_allocated; } -static inline int sk_wmem_schedule(struct sock *sk, int size) +static inline bool sk_wmem_schedule(struct sock *sk, int size) { if (!sk_has_account(sk)) - return 1; + return true; return size <= sk->sk_forward_alloc || __sk_mem_schedule(sk, size, SK_MEM_SEND); } -static inline int sk_rmem_schedule(struct sock *sk, int size) +static inline bool sk_rmem_schedule(struct sock *sk, int size) { if (!sk_has_account(sk)) - return 1; + return true; return size <= sk->sk_forward_alloc || __sk_mem_schedule(sk, size, SK_MEM_RECV); } @@ -1352,7 +1352,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) * Mark both the sk_lock and the sk_lock.slock as a * per-address-family lock class. */ -#define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ +#define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ @@ -1360,7 +1360,7 @@ do { \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ sizeof((sk)->sk_lock)); \ lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ - (skey), (sname)); \ + (skey), (sname)); \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) @@ -1420,13 +1420,13 @@ extern int sock_setsockopt(struct socket *sock, int level, unsigned int optlen); extern int sock_getsockopt(struct socket *sock, int level, - int op, char __user *optval, + int op, char __user *optval, int __user *optlen); -extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, +extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode); -extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, +extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, unsigned long data_len, int noblock, @@ -1448,7 +1448,7 @@ static inline void sock_update_classid(struct sock *sk) * Functions to fill in entries in struct proto_ops when a protocol * does not implement a particular function. */ -extern int sock_no_bind(struct socket *, +extern int sock_no_bind(struct socket *, struct sockaddr *, int); extern int sock_no_connect(struct socket *, struct sockaddr *, int, int); @@ -1477,7 +1477,7 @@ extern int sock_no_mmap(struct file *file, struct vm_area_struct *vma); extern ssize_t sock_no_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, + int offset, size_t size, int flags); /* @@ -1500,7 +1500,7 @@ extern void sk_common_release(struct sock *sk); /* * Default socket callbacks and setup code */ - + /* Initialise core socket variables */ extern void sock_init_data(struct socket *sock, struct sock *sk); @@ -1700,7 +1700,7 @@ extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); -static inline int sk_can_gso(const struct sock *sk) +static inline bool sk_can_gso(const struct sock *sk) { return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); } @@ -1817,7 +1817,7 @@ static inline int sk_rmem_alloc_get(const struct sock *sk) * * Returns true if socket has write or read allocations */ -static inline int sk_has_allocations(const struct sock *sk) +static inline bool sk_has_allocations(const struct sock *sk) { return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); } @@ -1856,9 +1856,7 @@ static inline int sk_has_allocations(const struct sock *sk) */ static inline bool wq_has_sleeper(struct socket_wq *wq) { - - /* - * We need to be sure we are in sync with the + /* We need to be sure we are in sync with the * add_wait_queue modifications to the wait queue. * * This memory barrier is paired in the sock_poll_wait. @@ -1880,22 +1878,21 @@ static inline void sock_poll_wait(struct file *filp, { if (!poll_does_not_wait(p) && wait_address) { poll_wait(filp, wait_address, p); - /* - * We need to be sure we are in sync with the + /* We need to be sure we are in sync with the * socket flags modification. * * This memory barrier is paired in the wq_has_sleeper. - */ + */ smp_mb(); } } /* - * Queue a received datagram if it will fit. Stream and sequenced + * Queue a received datagram if it will fit. Stream and sequenced * protocols can't normally use this as they need to fit buffers in * and play with them. * - * Inlined as it's very short and called for pretty much every + * Inlined as it's very short and called for pretty much every * packet ever received. */ @@ -1921,10 +1918,10 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) sk_mem_charge(sk, skb->truesize); } -extern void sk_reset_timer(struct sock *sk, struct timer_list* timer, +extern void sk_reset_timer(struct sock *sk, struct timer_list *timer, unsigned long expires); -extern void sk_stop_timer(struct sock *sk, struct timer_list* timer); +extern void sk_stop_timer(struct sock *sk, struct timer_list *timer); extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); @@ -1933,7 +1930,7 @@ extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); /* * Recover an error report and clear atomically */ - + static inline int sock_error(struct sock *sk) { int err; @@ -1949,7 +1946,7 @@ static inline unsigned long sock_wspace(struct sock *sk) if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); - if (amt < 0) + if (amt < 0) amt = 0; } return amt; @@ -1993,7 +1990,7 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk) /* * Default write policy as shown to user space via poll/select/SIGIO */ -static inline int sock_writeable(const struct sock *sk) +static inline bool sock_writeable(const struct sock *sk) { return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); } @@ -2003,12 +2000,12 @@ static inline gfp_t gfp_any(void) return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } -static inline long sock_rcvtimeo(const struct sock *sk, int noblock) +static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) { return noblock ? 0 : sk->sk_rcvtimeo; } -static inline long sock_sndtimeo(const struct sock *sk, int noblock) +static inline long sock_sndtimeo(const struct sock *sk, bool noblock) { return noblock ? 0 : sk->sk_sndtimeo; } @@ -2031,7 +2028,7 @@ extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, extern void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); -static __inline__ void +static inline void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { ktime_t kt = skb->tstamp; @@ -2072,7 +2069,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, (1UL << SOCK_RCVTSTAMP) | \ (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ (1UL << SOCK_TIMESTAMPING_SOFTWARE) | \ - (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) | \ + (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) | \ (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE)) if (sk->sk_flags & FLAGS_TS_OR_DROPS) @@ -2101,7 +2098,7 @@ extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); * locked so that the sk_buff queue operation is ok. */ #ifdef CONFIG_NET_DMA -static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early) +static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early) { __skb_unlink(skb, &sk->sk_receive_queue); if (!copied_early) @@ -2110,7 +2107,7 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e __skb_queue_tail(&sk->sk_async_wait_queue, skb); } #else -static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early) +static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early) { __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); @@ -2166,8 +2163,8 @@ extern void sock_enable_timestamp(struct sock *sk, int flag); extern int sock_get_timestamp(struct sock *, struct timeval __user *); extern int sock_get_timestampns(struct sock *, struct timespec __user *); -/* - * Enable debug/info messages +/* + * Enable debug/info messages */ extern int net_msg_warn; #define NETDEBUG(fmt, args...) \ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 7065c0ae1e7b..6c7c78b83940 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -848,7 +848,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, default: dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); - sk_eat_skb(sk, skb, 0); + sk_eat_skb(sk, skb, false); } verify_sock_status: if (sock_flag(sk, SOCK_DONE)) { @@ -905,7 +905,7 @@ verify_sock_status: len = skb->len; found_fin_ok: if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb, 0); + sk_eat_skb(sk, skb, false); break; } while (1); out: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fa0d078a677e..16b90ddeae55 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1413,11 +1413,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, break; } if (tcp_hdr(skb)->fin) { - sk_eat_skb(sk, skb, 0); + sk_eat_skb(sk, skb, false); ++seq; break; } - sk_eat_skb(sk, skb, 0); + sk_eat_skb(sk, skb, false); if (!desc->count) break; tp->copied_seq = seq; @@ -1456,7 +1456,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; - int copied_early = 0; + bool copied_early = false; struct sk_buff *skb; u32 urg_hole = 0; @@ -1735,7 +1735,7 @@ do_prequeue: dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); if ((offset + used) == skb->len) - copied_early = 1; + copied_early = true; } else #endif @@ -1769,7 +1769,7 @@ skip_copy: goto found_fin_ok; if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, copied_early); - copied_early = 0; + copied_early = false; } continue; @@ -1778,7 +1778,7 @@ skip_copy: ++*seq; if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, copied_early); - copied_early = 0; + copied_early = false; } break; } while (len > 0); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 1c2dc50c9b44..0ccf79f5e8cf 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -841,7 +841,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, if (!(flags & MSG_PEEK)) { spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); - sk_eat_skb(sk, skb, 0); + sk_eat_skb(sk, skb, false); spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); *seq = 0; } @@ -864,7 +864,7 @@ copy_uaddr: if (!(flags & MSG_PEEK)) { spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); - sk_eat_skb(sk, skb, 0); + sk_eat_skb(sk, skb, false); spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); *seq = 0; } From e9ff904465117484fca9c1eec7ccacb37a38fbce Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 14 Apr 2013 08:08:13 +0000 Subject: [PATCH 04/12] BACKPORT: net: sock: make sock_tx_timestamp void Currently, sock_tx_timestamp() always returns 0. The comment that describes the sock_tx_timestamp() function wrongly says that it returns an error when an invalid argument is passed (from commit 20d4947353be, ``net: socket infrastructure for SO_TIMESTAMPING''). Make the function void, so that we can also remove all the unneeded if conditions that check for such a _non-existant_ error case in the output path. Change-Id: Ibdfd5071737190371d4abec5ae76046b5aa8de23 Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Kevin F. Haggerty --- include/net/sock.h | 5 ++--- net/can/raw.c | 5 ++--- net/ipv4/ping.c | 5 ++--- net/ipv4/udp.c | 6 +++--- net/ipv6/ip6_output.c | 7 ++----- net/packet/af_packet.c | 10 ++++------ net/socket.c | 3 +-- 7 files changed, 16 insertions(+), 25 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 1ca2381bd85e..14df41307829 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2083,10 +2083,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, * @sk: socket sending this packet * @tx_flags: filled with instructions for time stamping * - * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if - * parameters are invalid. + * Currently only depends on SOCK_TIMESTAMPING* flags. */ -extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); +extern void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed diff --git a/net/can/raw.c b/net/can/raw.c index 46cca3a91d19..2fa3d8accda4 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -677,9 +677,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto free_skb; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); skb->dev = dev; skb->sk = sk; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 76dbeda40efd..8743c3945960 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -738,9 +738,8 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; ipc.tx_flags = 0; - err = sock_tx_timestamp(sk, &ipc.tx_flags); - if (err) - return err; + + sock_tx_timestamp(sk, &ipc.tx_flags); if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e62f006783a9..88cbcaf0e9f7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -872,9 +872,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - err = sock_tx_timestamp(sk, &ipc.tx_flags); - if (err) - return err; + + sock_tx_timestamp(sk, &ipc.tx_flags); + if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5c00c36fea45..24bde89ad4bc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1314,11 +1314,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } /* For UDP, check if TX timestamp is enabled */ - if (sk->sk_type == SOCK_DGRAM) { - err = sock_tx_timestamp(sk, &tx_flags); - if (err) - goto error; - } + if (sk->sk_type == SOCK_DGRAM) + sock_tx_timestamp(sk, &tx_flags); /* * Let's try using as much space as possible. diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bb7e38c57092..b844803f4017 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1543,9 +1543,8 @@ retry: skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_unlock; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2327,9 +2326,8 @@ static int packet_snd(struct socket *sock, err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); if (err) goto out_free; - err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (err < 0) - goto out_free; + + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (!gso_type && (len > dev->mtu + reserve + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, diff --git a/net/socket.c b/net/socket.c index def8541a02d5..579ff143b8a8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -535,7 +535,7 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) +void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { *tx_flags = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) @@ -544,7 +544,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) *tx_flags |= SKBTX_SW_TSTAMP; if (sock_flag(sk, SOCK_WIFI_STATUS)) *tx_flags |= SKBTX_WIFI_STATUS; - return 0; } EXPORT_SYMBOL(sock_tx_timestamp); From 721c4f160d53209e7735ff125bfa8370327e20d1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 22 Sep 2013 10:32:26 -0700 Subject: [PATCH 05/12] BACKPORT: sock.h: Remove extern from function prototypes There are a mix of function prototypes with and without extern in the kernel sources. Standardize on not using extern for function prototypes. Function prototypes don't need to be written with extern. extern is assumed by the compiler. Its use is as unnecessary as using auto to declare automatic/local variables in a block. Change-Id: I65041ae9f8472dc9f84f5f8e09dc3ac859c7d05a Signed-off-by: Joe Perches Signed-off-by: David S. Miller Signed-off-by: Adrian DC Signed-off-by: Kevin F. Haggerty --- include/net/sock.h | 204 ++++++++++++++++++++------------------------- 1 file changed, 90 insertions(+), 114 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 14df41307829..79bc531ee5e7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -680,7 +680,7 @@ static inline int sk_stream_wspace(const struct sock *sk) return sk->sk_sndbuf - sk->sk_wmem_queued; } -extern void sk_stream_write_space(struct sock *sk); +void sk_stream_write_space(struct sock *sk); static inline bool sk_stream_memory_free(const struct sock *sk) { @@ -785,13 +785,13 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) __rc; \ }) -extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p); -extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p); -extern void sk_stream_wait_close(struct sock *sk, long timeo_p); -extern int sk_stream_error(struct sock *sk, int flags, int err); -extern void sk_stream_kill_queues(struct sock *sk); +int sk_stream_wait_connect(struct sock *sk, long *timeo_p); +int sk_stream_wait_memory(struct sock *sk, long *timeo_p); +void sk_stream_wait_close(struct sock *sk, long timeo_p); +int sk_stream_error(struct sock *sk, int flags, int err); +void sk_stream_kill_queues(struct sock *sk); -extern int sk_wait_data(struct sock *sk, long *timeo); +int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; struct timewait_sock_ops; @@ -949,8 +949,8 @@ struct cg_proto { struct mem_cgroup *memcg; }; -extern int proto_register(struct proto *prot, int alloc_slab); -extern void proto_unregister(struct proto *prot); +int proto_register(struct proto *prot, int alloc_slab); +void proto_unregister(struct proto *prot); #ifdef SOCK_REFCNT_DEBUG static inline void sk_refcnt_debug_inc(struct sock *sk) @@ -1180,8 +1180,8 @@ proto_memory_pressure(struct proto *prot) #ifdef CONFIG_PROC_FS /* Called with local bh disabled */ -extern void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); -extern int sock_prot_inuse_get(struct net *net, struct proto *proto); +void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); +int sock_prot_inuse_get(struct net *net, struct proto *proto); #else static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc) @@ -1257,8 +1257,8 @@ static inline struct inode *SOCK_INODE(struct socket *socket) /* * Functions for memory accounting */ -extern int __sk_mem_schedule(struct sock *sk, int size, int kind); -extern void __sk_mem_reclaim(struct sock *sk); +int __sk_mem_schedule(struct sock *sk, int size, int kind); +void __sk_mem_reclaim(struct sock *sk); #define SK_MEM_QUANTUM ((int)PAGE_SIZE) #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) @@ -1364,14 +1364,14 @@ do { \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) -extern void lock_sock_nested(struct sock *sk, int subclass); +void lock_sock_nested(struct sock *sk, int subclass); static inline void lock_sock(struct sock *sk) { lock_sock_nested(sk, 0); } -extern void release_sock(struct sock *sk); +void release_sock(struct sock *sk); /* BH context may only use the following locking interface. */ #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) @@ -1380,7 +1380,7 @@ extern void release_sock(struct sock *sk); SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -extern bool lock_sock_fast(struct sock *sk); +bool lock_sock_fast(struct sock *sk); /** * unlock_sock_fast - complement of lock_sock_fast * @sk: socket @@ -1398,46 +1398,35 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) } -extern struct sock *sk_alloc(struct net *net, int family, - gfp_t priority, - struct proto *prot); -extern void sk_free(struct sock *sk); -extern void sk_release_kernel(struct sock *sk); -extern struct sock *sk_clone_lock(const struct sock *sk, - const gfp_t priority); +struct sock *sk_alloc(struct net *net, int family, gfp_t priority, + struct proto *prot); +void sk_free(struct sock *sk); +void sk_release_kernel(struct sock *sk); +struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); -extern struct sk_buff *sock_wmalloc(struct sock *sk, - unsigned long size, int force, - gfp_t priority); -extern struct sk_buff *sock_rmalloc(struct sock *sk, - unsigned long size, int force, - gfp_t priority); -extern void sock_wfree(struct sk_buff *skb); -extern void sock_rfree(struct sk_buff *skb); +struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, + gfp_t priority); +struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, + gfp_t priority); +void sock_wfree(struct sk_buff *skb); +void sock_rfree(struct sk_buff *skb); -extern int sock_setsockopt(struct socket *sock, int level, - int op, char __user *optval, - unsigned int optlen); +int sock_setsockopt(struct socket *sock, int level, int op, + char __user *optval, unsigned int optlen); -extern int sock_getsockopt(struct socket *sock, int level, - int op, char __user *optval, - int __user *optlen); -extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, - unsigned long size, - int noblock, - int *errcode); -extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk, - unsigned long header_len, - unsigned long data_len, - int noblock, - int *errcode); -extern void *sock_kmalloc(struct sock *sk, int size, - gfp_t priority); -extern void sock_kfree_s(struct sock *sk, void *mem, int size); -extern void sk_send_sigurg(struct sock *sk); +int sock_getsockopt(struct socket *sock, int level, int op, + char __user *optval, int __user *optlen); +struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, + int noblock, int *errcode); +struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, + unsigned long data_len, int noblock, + int *errcode); +void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); +void sock_kfree_s(struct sock *sk, void *mem, int size); +void sk_send_sigurg(struct sock *sk); #ifdef CONFIG_CGROUPS -extern void sock_update_classid(struct sock *sk); +void sock_update_classid(struct sock *sk); #else static inline void sock_update_classid(struct sock *sk) { @@ -1448,63 +1437,51 @@ static inline void sock_update_classid(struct sock *sk) * Functions to fill in entries in struct proto_ops when a protocol * does not implement a particular function. */ -extern int sock_no_bind(struct socket *, - struct sockaddr *, int); -extern int sock_no_connect(struct socket *, - struct sockaddr *, int, int); -extern int sock_no_socketpair(struct socket *, - struct socket *); -extern int sock_no_accept(struct socket *, - struct socket *, int); -extern int sock_no_getname(struct socket *, - struct sockaddr *, int *, int); -extern unsigned int sock_no_poll(struct file *, struct socket *, - struct poll_table_struct *); -extern int sock_no_ioctl(struct socket *, unsigned int, - unsigned long); -extern int sock_no_listen(struct socket *, int); -extern int sock_no_shutdown(struct socket *, int); -extern int sock_no_getsockopt(struct socket *, int , int, - char __user *, int __user *); -extern int sock_no_setsockopt(struct socket *, int, int, - char __user *, unsigned int); -extern int sock_no_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -extern int sock_no_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); -extern int sock_no_mmap(struct file *file, - struct socket *sock, - struct vm_area_struct *vma); -extern ssize_t sock_no_sendpage(struct socket *sock, - struct page *page, - int offset, size_t size, - int flags); +int sock_no_bind(struct socket *, struct sockaddr *, int); +int sock_no_connect(struct socket *, struct sockaddr *, int, int); +int sock_no_socketpair(struct socket *, struct socket *); +int sock_no_accept(struct socket *, struct socket *, int); +int sock_no_getname(struct socket *, struct sockaddr *, int *, int); +unsigned int sock_no_poll(struct file *, struct socket *, + struct poll_table_struct *); +int sock_no_ioctl(struct socket *, unsigned int, unsigned long); +int sock_no_listen(struct socket *, int); +int sock_no_shutdown(struct socket *, int); +int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *); +int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int); +int sock_no_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); +int sock_no_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, + int); +int sock_no_mmap(struct file *file, struct socket *sock, + struct vm_area_struct *vma); +ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags); /* * Functions to fill in entries in struct proto_ops when a protocol * uses the inet style. */ -extern int sock_common_getsockopt(struct socket *sock, int level, int optname, +int sock_common_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); -extern int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, +int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags); -extern int sock_common_setsockopt(struct socket *sock, int level, int optname, +int sock_common_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); -extern int compat_sock_common_getsockopt(struct socket *sock, int level, +int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); -extern int compat_sock_common_setsockopt(struct socket *sock, int level, +int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); -extern void sk_common_release(struct sock *sk); +void sk_common_release(struct sock *sk); /* * Default socket callbacks and setup code */ /* Initialise core socket variables */ -extern void sock_init_data(struct socket *sock, struct sock *sk); +void sock_init_data(struct socket *sock, struct sock *sk); -extern void sk_filter_release_rcu(struct rcu_head *rcu); +void sk_filter_release_rcu(struct rcu_head *rcu); /** * sk_filter_release - release a socket filter @@ -1565,8 +1542,7 @@ static inline void sock_put(struct sock *sk) sk_free(sk); } -extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb, - const int nested); +int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) { @@ -1620,8 +1596,8 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } -extern int sock_i_uid(struct sock *sk); -extern unsigned long sock_i_ino(struct sock *sk); +int sock_i_uid(struct sock *sk); +unsigned long sock_i_ino(struct sock *sk); static inline struct dst_entry * __sk_dst_get(struct sock *sk) @@ -1643,7 +1619,7 @@ sk_dst_get(struct sock *sk) return dst; } -extern void sk_reset_txq(struct sock *sk); +void sk_reset_txq(struct sock *sk); static inline void dst_negative_advice(struct sock *sk) { @@ -1696,16 +1672,16 @@ sk_dst_reset(struct sock *sk) spin_unlock(&sk->sk_dst_lock); } -extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); +struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); -extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); static inline bool sk_can_gso(const struct sock *sk) { return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); } -extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); +void sk_setup_caps(struct sock *sk, struct dst_entry *dst); static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) { @@ -1918,14 +1894,14 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) sk_mem_charge(sk, skb->truesize); } -extern void sk_reset_timer(struct sock *sk, struct timer_list *timer, - unsigned long expires); +void sk_reset_timer(struct sock *sk, struct timer_list *timer, + unsigned long expires); -extern void sk_stop_timer(struct sock *sk, struct timer_list *timer); +void sk_stop_timer(struct sock *sk, struct timer_list *timer); -extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); +int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); /* * Recover an error report and clear atomically @@ -2023,10 +1999,10 @@ static inline int sock_intr_errno(long timeo) return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR; } -extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb); -extern void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb); +void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); +void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); static inline void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) @@ -2059,8 +2035,8 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) __sock_recv_wifi_status(msg, sk, skb); } -extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb); +void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) @@ -2085,7 +2061,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, * * Currently only depends on SOCK_TIMESTAMPING* flags. */ -extern void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); +void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed @@ -2158,9 +2134,9 @@ static inline bool sk_fullsock(const struct sock *sk) return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT); } -extern void sock_enable_timestamp(struct sock *sk, int flag); -extern int sock_get_timestamp(struct sock *, struct timeval __user *); -extern int sock_get_timestampns(struct sock *, struct timespec __user *); +void sock_enable_timestamp(struct sock *sk, int flag); +int sock_get_timestamp(struct sock *, struct timeval __user *); +int sock_get_timestampns(struct sock *, struct timespec __user *); /* * Enable debug/info messages @@ -2175,7 +2151,7 @@ extern int net_msg_warn; extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -extern void sk_init(void); +void sk_init(void); extern int sysctl_optmem_max; From 82794d1071c88ad21c559e08401477093d8d63fe Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 24 Jul 2015 18:19:25 +0200 Subject: [PATCH 06/12] BACKPORT: tcp: fix recv with flags MSG_WAITALL | MSG_PEEK Currently, tcp_recvmsg enters a busy loop in sk_wait_data if called with flags = MSG_WAITALL | MSG_PEEK. sk_wait_data waits for sk_receive_queue not empty, but in this case, the receive queue is not empty, but does not contain any skb that we can use. Add a "last skb seen on receive queue" argument to sk_wait_data, so that it sleeps until the receive queue has new skbs. Change-Id: If58492ae474effe058541f7e9a0c03dc24155393 Link: https://bugzilla.kernel.org/show_bug.cgi?id=99461 Link: https://sourceware.org/bugzilla/show_bug.cgi?id=18493 Link: https://bugzilla.redhat.com/show_bug.cgi?id=1205258 Reported-by: Enrico Scholz Reported-by: Dan Searle Signed-off-by: Sabrina Dubroca Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Kevin F. Haggerty --- include/net/sock.h | 2 +- net/core/sock.c | 5 +++-- net/dccp/proto.c | 2 +- net/ipv4/tcp.c | 11 +++++++---- net/llc/af_llc.c | 4 ++-- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 79bc531ee5e7..cbb2148c4d2a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -791,7 +791,7 @@ void sk_stream_wait_close(struct sock *sk, long timeo_p); int sk_stream_error(struct sock *sk, int flags, int err); void sk_stream_kill_queues(struct sock *sk); -int sk_wait_data(struct sock *sk, long *timeo); +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb); struct request_sock_ops; struct timewait_sock_ops; diff --git a/net/core/sock.c b/net/core/sock.c index 173922919ec1..a6b002f12b3e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1739,20 +1739,21 @@ static void __release_sock(struct sock *sk) * sk_wait_data - wait for data to arrive at sk_receive_queue * @sk: sock to wait on * @timeo: for how long + * @skb: last skb seen on sk_receive_queue * * Now socket state including sk->sk_err is changed only under lock, * hence we may omit checks after joining wait queue. * We check receive queue before schedule() only as optimization; * it is very likely that release_sock() added new data. */ -int sk_wait_data(struct sock *sk, long *timeo) +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) { int rc; DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); + rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); finish_wait(sk_sleep(sk), &wait); return rc; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 6c7c78b83940..ffff2cf23e5d 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -888,7 +888,7 @@ verify_sock_status: break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); continue; found_ok_skb: if (len > skb->len) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 16b90ddeae55..62a508c8a7d8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -677,7 +677,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, */ if (!skb_queue_empty(&sk->sk_receive_queue)) break; - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; @@ -1457,7 +1457,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, long timeo; struct task_struct *user_recv = NULL; bool copied_early = false; - struct sk_buff *skb; + struct sk_buff *skb, *last; u32 urg_hole = 0; lock_sock(sk); @@ -1517,7 +1517,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Next get a buffer. */ + last = skb_peek_tail(&sk->sk_receive_queue); skb_queue_walk(&sk->sk_receive_queue, skb) { + last = skb; /* Now that we have two receive queues this * shouldn't happen. */ @@ -1646,8 +1648,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); - } else - sk_wait_data(sk, &timeo); + } else { + sk_wait_data(sk, &timeo, last); + } #ifdef CONFIG_NET_DMA tcp_service_net_dma(sk, false); /* Don't block */ diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 0ccf79f5e8cf..92d33f9f4dbd 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -614,7 +614,7 @@ static int llc_wait_data(struct sock *sk, long timeo) if (signal_pending(current)) break; rc = 0; - if (sk_wait_data(sk, &timeo)) + if (sk_wait_data(sk, &timeo, NULL)) break; } return rc; @@ -803,7 +803,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); lock_sock(sk); } else - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { if (net_ratelimit()) From 941eda2515fcf24e98ac86c352f8472dc96ca30f Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Wed, 29 Aug 2012 10:44:29 +0000 Subject: [PATCH 07/12] net: Providing protocol type via system.sockprotoname xattr of /proc/PID/fd entries lsof reports some of socket descriptors as "can't identify protocol" like: [yamato@localhost]/tmp% sudo lsof | grep dbus | grep iden dbus-daem 652 dbus 6u sock ... 17812 can't identify protocol dbus-daem 652 dbus 34u sock ... 24689 can't identify protocol dbus-daem 652 dbus 42u sock ... 24739 can't identify protocol dbus-daem 652 dbus 48u sock ... 22329 can't identify protocol ... lsof cannot resolve the protocol used in a socket because procfs doesn't provide the map between inode number on sockfs and protocol type of the socket. For improving the situation this patch adds an extended attribute named 'system.sockprotoname' in which the protocol name for /proc/PID/fd/SOCKET is stored. So lsof can know the protocol for a given /proc/PID/fd/SOCKET with getxattr system call. A few weeks ago I submitted a patch for the same purpose. The patch was introduced /proc/net/sockfs which enumerates inodes and protocols of all sockets alive on a system. However, it was rejected because (1) a global lock was needed, and (2) the layout of struct socket was changed with the patch. This patch doesn't use any global lock; and doesn't change the layout of any structs. In this patch, a protocol name is stored to dentry->d_name of sockfs when new socket is associated with a file descriptor. Before this patch dentry->d_name was not used; it was just filled with empty string. lsof may use an extended attribute named 'system.sockprotoname' to retrieve the value of dentry->d_name. It is nice if we can see the protocol name with ls -l /proc/PID/fd. However, "socket:[#INODE]", the name format returned from sockfs_dname() was already defined. To keep the compatibility between kernel and user land, the extended attribute is used to prepare the value of dentry->d_name. Change-Id: I04143ee6da5c236835a897086fc0de819abb0cdc Signed-off-by: Masatake YAMATO Signed-off-by: David S. Miller Signed-off-by: Kevin F. Haggerty --- net/socket.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 5 deletions(-) diff --git a/net/socket.c b/net/socket.c index 579ff143b8a8..daaa138456e3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -88,6 +88,7 @@ #include #include #include +#include #include #include @@ -347,7 +348,8 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ -static int sock_alloc_file(struct socket *sock, struct file **f, int flags) +static int sock_alloc_file(struct socket *sock, struct file **f, int flags, + const char *dname) { struct qstr name = { .name = "" }; struct path path; @@ -358,6 +360,13 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; + if (dname) { + name.name = dname; + name.len = strlen(name.name); + } else if (sock->sk) { + name.name = sock->sk->sk_prot_creator->name; + name.len = strlen(name.name); + } path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); if (unlikely(!path.dentry)) { put_unused_fd(fd); @@ -390,7 +399,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_file(sock, &newfile, flags); + int fd = sock_alloc_file(sock, &newfile, flags, NULL); if (likely(fd >= 0)) fd_install(fd, newfile); @@ -455,6 +464,68 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) return NULL; } +#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname" +#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX) +#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1) +static ssize_t sockfs_getxattr(struct dentry *dentry, + const char *name, void *value, size_t size) +{ + const char *proto_name; + size_t proto_size; + int error; + + error = -ENODATA; + if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) { + proto_name = dentry->d_name.name; + proto_size = strlen(proto_name); + + if (value) { + error = -ERANGE; + if (proto_size + 1 > size) + goto out; + + strncpy(value, proto_name, proto_size + 1); + } + error = proto_size + 1; + } + +out: + return error; +} + +static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, + size_t size) +{ + ssize_t len; + ssize_t used = 0; + + len = security_inode_listsecurity(dentry->d_inode, buffer, size); + if (len < 0) + return len; + used += len; + if (buffer) { + if (size < used) + return -ERANGE; + buffer += len; + } + + len = (XATTR_NAME_SOCKPROTONAME_LEN + 1); + used += len; + if (buffer) { + if (size < used) + return -ERANGE; + memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len); + buffer += len; + } + + return used; +} + +static const struct inode_operations sockfs_inode_ops = { + .getxattr = sockfs_getxattr, + .listxattr = sockfs_listxattr, +}; + /** * sock_alloc - allocate a socket * @@ -479,6 +550,7 @@ static struct socket *sock_alloc(void) inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); + inode->i_op = &sockfs_inode_ops; percpu_add(sockets_in_use, 1); return sock; @@ -1396,13 +1468,13 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_file(sock1, &newfile1, flags); + fd1 = sock_alloc_file(sock1, &newfile1, flags, NULL); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - fd2 = sock_alloc_file(sock2, &newfile2, flags); + fd2 = sock_alloc_file(sock2, &newfile2, flags, NULL); if (unlikely(fd2 < 0)) { err = fd2; fput(newfile1); @@ -1538,7 +1610,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_file(newsock, &newfile, flags); + newfd = sock_alloc_file(newsock, &newfile, flags, + sock->sk->sk_prot_creator->name); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); From d3cd043531c8956c7a17b8796b44c3b5dafb2fb6 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:41 +0900 Subject: [PATCH 08/12] net: core: Add a UID field to struct sock. Protocol sockets (struct sock) don't have UIDs, but most of the time, they map 1:1 to userspace sockets (struct socket) which do. Various operations such as the iptables xt_owner match need access to the "UID of a socket", and do so by following the backpointer to the struct socket. This involves taking sk_callback_lock and doesn't work when there is no socket because userspace has already called close(). Simplify this by adding a sk_uid field to struct sock whose value matches the UID of the corresponding struct socket. The semantics are as follows: 1. Whenever sk_socket is non-null: sk_uid is the same as the UID in sk_socket, i.e., matches the return value of sock_i_uid. Specifically, the UID is set when userspace calls socket(), fchown(), or accept(). 2. When sk_socket is NULL, sk_uid is defined as follows: - For a socket that no longer has a sk_socket because userspace has called close(): the previous UID. - For a cloned socket (e.g., an incoming connection that is established but on which userspace has not yet called accept): the UID of the socket it was cloned from. - For a socket that has never had an sk_socket: UID 0 inside the user namespace corresponding to the network namespace the socket belongs to. Kernel sockets created by sock_create_kern are a special case of #1 and sk_uid is the user that created them. For kernel sockets created at network namespace creation time, such as the per-processor ICMP and TCP sockets, this is the user that created the network namespace. [Backport of net-next 86741ec25462e4c8cdce6df2f41ead05568c7d5e] Bug: 16355602 Change-Id: I73e1a57dfeedf672f4c2dfc9ce6867838b55974b Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller Signed-off-by: Kevin F. Haggerty --- include/net/sock.h | 7 +++++++ net/core/sock.c | 5 ++++- net/socket.c | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index cbb2148c4d2a..2cf496881715 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -374,6 +374,7 @@ struct sock { void *sk_security; #endif __u32 sk_mark; + kuid_t sk_uid; u32 sk_classid; struct cg_proto *sk_cgrp; void (*sk_state_change)(struct sock *sk); @@ -1592,6 +1593,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_wq = parent->wq; parent->sk = sk; sk_set_socket(sk, parent); + sk->sk_uid = SOCK_INODE(parent)->i_uid; security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } @@ -1599,6 +1601,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) int sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); +static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) +{ + return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); +} + static inline struct dst_entry * __sk_dst_get(struct sock *sk) { diff --git a/net/core/sock.c b/net/core/sock.c index a6b002f12b3e..2bf27ca04253 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2093,8 +2093,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_type = sock->type; sk->sk_wq = sock->wq; sock->sk = sk; - } else + sk->sk_uid = SOCK_INODE(sock)->i_uid; + } else { sk->sk_wq = NULL; + sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); + } spin_lock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); diff --git a/net/socket.c b/net/socket.c index daaa138456e3..b05f7280fa7d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -521,9 +521,23 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } +int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + int err = simple_setattr(dentry, iattr); + + if (!err) { + struct socket *sock = SOCKET_I(dentry->d_inode); + + sock->sk->sk_uid = iattr->ia_uid; + } + + return err; +} + static const struct inode_operations sockfs_inode_ops = { .getxattr = sockfs_getxattr, .listxattr = sockfs_listxattr, + .setattr = sockfs_setattr, }; /** From 1396cfea05c3815a587d4b958591f43a62ca2964 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 30 Dec 2016 17:42:32 -0600 Subject: [PATCH 09/12] net: socket: don't set sk_uid to garbage value in ->setattr() ->setattr() was recently implemented for socket files to sync the socket inode's uid to the new 'sk_uid' member of struct sock. It does this by copying over the ia_uid member of struct iattr. However, ia_uid is actually only valid when ATTR_UID is set in ia_valid, indicating that the uid is being changed, e.g. by chown. Other metadata operations such as chmod or utimes leave ia_uid uninitialized. Therefore, sk_uid could be set to a "garbage" value from the stack. Fix this by only copying the uid over when ATTR_UID is set. [backport of net e1a3a60a2ebe991605acb14cd58e39c0545e174e] Bug: 16355602 Change-Id: I20e53848e54282b72a388ce12bfa88da5e3e9efe Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Eric Biggers Tested-by: Lorenzo Colitti Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller Signed-off-by: Kevin F. Haggerty --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index b05f7280fa7d..ea5cce8200b4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -525,7 +525,7 @@ int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) { int err = simple_setattr(dentry, iattr); - if (!err) { + if (!err && (iattr->ia_valid & ATTR_UID)) { struct socket *sock = SOCKET_I(dentry->d_inode); sock->sk->sk_uid = iattr->ia_uid; From afd1d2b38a8a4a43d31f0187be9b1aa9bee05715 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:43 +0900 Subject: [PATCH 10/12] net: inet: Support UID-based routing in IP protocols. - Use the UID in routing lookups made by protocol connect() and sendmsg() functions. - Make sure that routing lookups triggered by incoming packets (e.g., Path MTU discovery) take the UID of the socket into account. - For packets not associated with a userspace socket, (e.g., ping replies) use UID 0 inside the user namespace corresponding to the network namespace the socket belongs to. This allows all namespaces to apply routing and iptables rules to kernel-originated traffic in that namespaces by matching UID 0. This is better than using the UID of the kernel socket that is sending the traffic, because the UID of kernel sockets created at namespace creation time (e.g., the per-processor ICMP and TCP sockets) is the UID of the user that created the socket, which might not be mapped in the namespace. Bug: 16355602 Change-Id: I910504b508948057912bc188fd1e8aca28294de3 Tested: compiles allnoconfig, allyesconfig, allmodconfig Tested: https://android-review.googlesource.com/253302 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller Signed-off-by: Kevin F. Haggerty --- include/net/flow.h | 4 +++- include/net/ip.h | 1 + include/net/ip6_route.h | 2 +- include/net/route.h | 5 +++-- net/ipv4/icmp.c | 2 ++ net/ipv4/inet_connection_sock.c | 6 ++++-- net/ipv4/ip_output.c | 3 ++- net/ipv4/ping.c | 3 ++- net/ipv4/raw.c | 2 +- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_ipv4.c | 11 +++++++---- net/ipv4/udp.c | 3 ++- net/ipv6/af_inet6.c | 1 + net/ipv6/ah6.c | 2 +- net/ipv6/datagram.c | 1 + net/ipv6/esp6.c | 2 +- net/ipv6/icmp.c | 2 ++ net/ipv6/inet6_connection_sock.c | 2 ++ net/ipv6/ip6_tunnel.c | 3 +++ net/ipv6/ipcomp6.c | 2 +- net/ipv6/netfilter.c | 1 + net/ipv6/ping.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/route.c | 5 +++-- net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 3 +++ net/ipv6/udp.c | 1 + 27 files changed, 52 insertions(+), 20 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index 728e4242f340..de69a0901d9f 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -89,7 +89,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport) + __be16 dport, __be16 sport, + kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; @@ -99,6 +100,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; diff --git a/include/net/ip.h b/include/net/ip.h index c2425b2a052f..3616dfa9c882 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -169,6 +169,7 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; + kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 8769ce827410..e0c9d25d159e 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -137,7 +137,7 @@ extern void rt6_redirect(const struct in6_addr *dest, int on_link); extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark); + int oif, u32 mark, kuid_t uid); extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); diff --git a/include/net/route.h b/include/net/route.h index b1c0d5b564c2..642f6a8a92e4 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -146,7 +146,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -250,7 +250,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_CAN_SLEEP; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index ea63a5767fcf..d7638c7a9786 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -361,6 +361,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.daddr = daddr; fl4.saddr = rt->rt_spec_dst; fl4.flowi4_mark = mark; + fl4.flowi4_uid = sock_net_uid(net, NULL); fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); @@ -392,6 +393,7 @@ static struct rtable *icmp_route_lookup(struct net *net, param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; fl4->flowi4_mark = mark; + fl4->flowi4_uid = sock_net_uid(net, NULL); fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index af3dc8e6f684..69b4b6c5669b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -365,7 +365,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -398,7 +399,8 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9c02efa10887..a7d77d2870c4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1506,7 +1506,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), daddr, rt->rt_spec_dst, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8743c3945960..19230311e849 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -786,7 +786,8 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c1b3970caa68..649adb08bd56 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -569,7 +569,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, sk->sk_uid); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8f2dd33e5688..28b9bda14ed0 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -352,7 +352,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); + ireq->loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 019b35231420..7b6d86044904 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -689,6 +689,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.bound_dev_if = sk->sk_bound_dev_if; arg.tos = ip_hdr(skb)->tos; + arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, &arg, arg.iov[0].iov_len); @@ -708,7 +709,8 @@ release_sk1: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, +static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, + u32 seq, u32 ack, u32 win, u32 ts, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) @@ -723,7 +725,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = sock_net(sk); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -772,6 +774,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, &arg, arg.iov[0].iov_len); @@ -783,7 +786,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent, tw->tw_bound_dev_if, @@ -798,7 +801,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, + tcp_v4_send_ack(sk, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 0, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 88cbcaf0e9f7..35de75cc0657 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -932,7 +932,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6742f89f1e76..417811ce726e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -711,6 +711,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f4cf506603b9..dcc989419a7c 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -620,7 +620,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", ntohl(ah->spi), &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 2cf0a5f2ad1d..d7619e414c54 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -160,6 +160,7 @@ ipv4_connected: fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 4af058a54cb3..cb4ff846c8f4 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -441,7 +441,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", ntohl(esph->spi), &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 569e558bdd20..028757bbf15f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -466,6 +466,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -564,6 +565,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 1c897ee65bac..97e65e5079cb 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -74,6 +74,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); @@ -225,6 +226,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_sport = inet->inet_sport; fl6.fl6_dport = inet->inet_dport; + fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3d3ba9f8da8a..e5ee756d2082 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1038,6 +1038,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); fl6.flowi6_proto = IPPROTO_IPIP; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + dsfield = ipv4_get_dsfield(iph); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) @@ -1089,6 +1091,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); fl6.flowi6_proto = IPPROTO_IPV6; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 587aa64e7c02..7e8b89bb0205 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -72,7 +72,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI6\n", spi, &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index db31561cc8df..9d2e72be1d49 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -19,6 +19,7 @@ int ip6_route_me_harder(struct sk_buff *skb) struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, + .flowi6_uid = sock_net_uid(net, skb->sk), .daddr = iph->daddr, .saddr = iph->saddr, }; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 28b96ff0d944..404993143df5 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -162,6 +162,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl6.saddr = np->saddr; fl6.daddr = *daddr; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4eccf836415a..4feee61ead52 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -760,6 +760,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 919b880498e6..495cdbb433c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1094,7 +1094,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark) + int oif, u32 mark, kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1107,6 +1107,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1118,7 +1119,7 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 921fcdc07004..44fbec5aa1f7 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -246,6 +246,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 67b8629060c4..deda8776d7a0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -252,6 +252,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -406,6 +407,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); @@ -907,6 +909,7 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; + fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a2cd0e789ce5..fbff9c89be56 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1094,6 +1094,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { opt = &opt_space; From 8dc08ba699bef5632d83c872609f50847a59a0cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 25 Nov 2012 06:41:45 +0000 Subject: [PATCH 11/12] ipv4: ipmr: various fixes and cleanups 1) ip_mroute_setsockopt() & ip_mroute_getsockopt() should not access/set raw_sk(sk)->ipmr_table before making sure the socket is a raw socket, and protocol is IGMP 2) MRT_INIT should return -EINVAL if optlen != sizeof(int), not -ENOPROTOOPT 3) MRT_ASSERT & MRT_PIM should validate optlen 4) " (v) ? 1 : 0 " can be written as " !!v " Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Kevin F. Haggerty Change-Id: I606493fe572b29f2c6a709833e799e20f2212882 --- net/ipv4/ipmr.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7b2ada0695f0..e745f6e334e3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1211,6 +1211,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi struct net *net = sock_net(sk); struct mr_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_IGMP) + return -EOPNOTSUPP; + mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); if (mrt == NULL) return -ENOENT; @@ -1223,11 +1227,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi switch (optname) { case MRT_INIT: - if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_IGMP) - return -EOPNOTSUPP; if (optlen != sizeof(int)) - return -ENOPROTOOPT; + return -EINVAL; rtnl_lock(); if (rtnl_dereference(mrt->mroute_sk)) { @@ -1288,9 +1289,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi case MRT_ASSERT: { int v; + if (optlen != sizeof(v)) + return -EINVAL; if (get_user(v, (int __user *)optval)) return -EFAULT; - mrt->mroute_do_assert = (v) ? 1 : 0; + mrt->mroute_do_assert = !!v; return 0; } #ifdef CONFIG_IP_PIMSM @@ -1298,9 +1301,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi { int v; + if (optlen != sizeof(v)) + return -EINVAL; if (get_user(v, (int __user *)optval)) return -EFAULT; - v = (v) ? 1 : 0; + v = !!v; rtnl_lock(); ret = 0; @@ -1329,7 +1334,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi } else { if (!ipmr_new_table(net, v)) ret = -ENOMEM; - raw_sk(sk)->ipmr_table = v; + else + raw_sk(sk)->ipmr_table = v; } rtnl_unlock(); return ret; @@ -1355,6 +1361,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int struct net *net = sock_net(sk); struct mr_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_IGMP) + return -EOPNOTSUPP; + mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); if (mrt == NULL) return -ENOENT; From 103a20d70adebec13ae91abd90990385c32bd5e3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 24 Feb 2017 16:29:06 +0800 Subject: [PATCH 12/12] ipv6: check sk sk_type and protocol early in ip_mroute_set/getsockopt [ Upstream commit 99253eb750fda6a644d5188fb26c43bad8d5a745 ] Commit 5e1859fbcc3c ("ipv4: ipmr: various fixes and cleanups") fixed the issue for ipv4 ipmr: ip_mroute_setsockopt() & ip_mroute_getsockopt() should not access/set raw_sk(sk)->ipmr_table before making sure the socket is a raw socket, and protocol is IGMP The same fix should be done for ipv6 ipmr as well. This patch can fix the panic caused by overwriting the same offset as ipmr_table as in raw_sk(sk) when accessing other type's socket by ip_mroute_setsockopt(). Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lee Jones Change-Id: I8d48f4611a2f2d0cb7ad5146036f571f12ecb1fc CVE-2017-18509 Signed-off-by: Kevin F. Haggerty --- net/ipv6/ip6mr.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 463f455d68ed..1bac891a3a0a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1588,6 +1588,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (mrt == NULL) return -ENOENT; @@ -1599,9 +1603,6 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns switch (optname) { case MRT6_INIT: - if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; if (optlen < sizeof(int)) return -EINVAL; @@ -1722,6 +1723,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (mrt == NULL) return -ENOENT;