 e370a72363
			
		
	
	
	e370a72363
	
	
	
		
			
			unix_stream_sendmsg() currently uses order-2 allocations,
and we had numerous reports this can fail.
The __GFP_REPEAT flag present in sock_alloc_send_pskb() is
not helping.
This patch extends the work done in commit eb6a24816b
("af_unix: reduce high order page allocations) for
datagram sockets.
This opens the possibility of zero copy IO (splice() and
friends)
The trick is to not use skb_pull() anymore in recvmsg() path,
and instead add a @consumed field in UNIXCB() to track amount
of already read payload in the skb.
There is a performance regression for large sends
because of extra page allocations that will be addressed
in a follow-up patch, allowing sock_alloc_send_pskb()
to attempt high order page allocations.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
		
	
			
		
			
				
	
	
		
			81 lines
		
	
	
	
		
			2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			81 lines
		
	
	
	
		
			2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| #ifndef __LINUX_NET_AFUNIX_H
 | |
| #define __LINUX_NET_AFUNIX_H
 | |
| 
 | |
| #include <linux/socket.h>
 | |
| #include <linux/un.h>
 | |
| #include <linux/mutex.h>
 | |
| #include <net/sock.h>
 | |
| 
 | |
| void unix_inflight(struct file *fp);
 | |
| void unix_notinflight(struct file *fp);
 | |
| void unix_gc(void);
 | |
| void wait_for_unix_gc(void);
 | |
| struct sock *unix_get_socket(struct file *filp);
 | |
| struct sock *unix_peer_get(struct sock *);
 | |
| 
 | |
| #define UNIX_HASH_SIZE	256
 | |
| #define UNIX_HASH_BITS	8
 | |
| 
 | |
| extern unsigned int unix_tot_inflight;
 | |
| extern spinlock_t unix_table_lock;
 | |
| extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 | |
| 
 | |
| struct unix_address {
 | |
| 	atomic_t	refcnt;
 | |
| 	int		len;
 | |
| 	unsigned int	hash;
 | |
| 	struct sockaddr_un name[0];
 | |
| };
 | |
| 
 | |
| struct unix_skb_parms {
 | |
| 	struct pid		*pid;		/* Skb credentials	*/
 | |
| 	kuid_t			uid;
 | |
| 	kgid_t			gid;
 | |
| 	struct scm_fp_list	*fp;		/* Passed files		*/
 | |
| #ifdef CONFIG_SECURITY_NETWORK
 | |
| 	u32			secid;		/* Security ID		*/
 | |
| #endif
 | |
| 	u32			consumed;
 | |
| };
 | |
| 
 | |
| #define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
 | |
| #define UNIXSID(skb)	(&UNIXCB((skb)).secid)
 | |
| 
 | |
| #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
 | |
| #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
 | |
| #define unix_state_lock_nested(s) \
 | |
| 				spin_lock_nested(&unix_sk(s)->lock, \
 | |
| 				SINGLE_DEPTH_NESTING)
 | |
| 
 | |
| /* The AF_UNIX socket */
 | |
| struct unix_sock {
 | |
| 	/* WARNING: sk has to be the first member */
 | |
| 	struct sock		sk;
 | |
| 	struct unix_address     *addr;
 | |
| 	struct path		path;
 | |
| 	struct mutex		readlock;
 | |
| 	struct sock		*peer;
 | |
| 	struct list_head	link;
 | |
| 	atomic_long_t		inflight;
 | |
| 	spinlock_t		lock;
 | |
| 	unsigned char		recursion_level;
 | |
| 	unsigned long		gc_flags;
 | |
| #define UNIX_GC_CANDIDATE	0
 | |
| #define UNIX_GC_MAYBE_CYCLE	1
 | |
| 	struct socket_wq	peer_wq;
 | |
| };
 | |
| #define unix_sk(__sk) ((struct unix_sock *)__sk)
 | |
| 
 | |
| #define peer_wait peer_wq.wait
 | |
| 
 | |
| long unix_inq_len(struct sock *sk);
 | |
| long unix_outq_len(struct sock *sk);
 | |
| 
 | |
| #ifdef CONFIG_SYSCTL
 | |
| int unix_sysctl_register(struct net *net);
 | |
| void unix_sysctl_unregister(struct net *net);
 | |
| #else
 | |
| static inline int unix_sysctl_register(struct net *net) { return 0; }
 | |
| static inline void unix_sysctl_unregister(struct net *net) {}
 | |
| #endif
 | |
| #endif
 |