| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * INET		An implementation of the TCP/IP protocol suite for the LINUX | 
					
						
							|  |  |  |  *		operating system.  INET is implemented using the  BSD Socket | 
					
						
							|  |  |  |  *		interface as the means of communication with the user level. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *		Definitions for the TCP protocol. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Version:	@(#)tcp.h	1.0.2	04/28/93 | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Author:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *		This program is free software; you can redistribute it and/or | 
					
						
							|  |  |  |  *		modify it under the terms of the GNU General Public License | 
					
						
							|  |  |  |  *		as published by the Free Software Foundation; either version | 
					
						
							|  |  |  |  *		2 of the License, or (at your option) any later version. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #ifndef _LINUX_TCP_H
 | 
					
						
							|  |  |  | #define _LINUX_TCP_H
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <linux/skbuff.h>
 | 
					
						
							| 
									
										
										
										
											2006-06-22 14:28:09 -07:00
										 |  |  | #include <linux/dmaengine.h>
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #include <net/sock.h>
 | 
					
						
							| 
									
										
										
										
											2005-08-09 20:10:42 -07:00
										 |  |  | #include <net/inet_connection_sock.h>
 | 
					
						
							| 
									
										
										
										
											2005-08-09 20:09:30 -07:00
										 |  |  | #include <net/inet_timewait_sock.h>
 | 
					
						
							| 
									
										
										
										
											2012-10-13 10:46:48 +01:00
										 |  |  | #include <uapi/linux/tcp.h>
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-04-10 21:04:22 -07:00
										 |  |  | static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2007-04-25 18:04:18 -07:00
										 |  |  | 	return (struct tcphdr *)skb_transport_header(skb); | 
					
						
							| 
									
										
										
										
											2007-04-10 21:04:22 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-18 17:43:48 -07:00
										 |  |  | static inline unsigned int tcp_hdrlen(const struct sk_buff *skb) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2007-04-10 21:04:22 -07:00
										 |  |  | 	return tcp_hdr(skb)->doff * 4; | 
					
						
							| 
									
										
										
										
											2007-03-18 17:43:48 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-12-07 14:14:14 +00:00
										 |  |  | static inline struct tcphdr *inner_tcp_hdr(const struct sk_buff *skb) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return (struct tcphdr *)skb_inner_transport_header(skb); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline unsigned int inner_tcp_hdrlen(const struct sk_buff *skb) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return inner_tcp_hdr(skb)->doff * 4; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-18 17:43:48 -07:00
										 |  |  | static inline unsigned int tcp_optlen(const struct sk_buff *skb) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2007-04-10 21:04:22 -07:00
										 |  |  | 	return (tcp_hdr(skb)->doff - 5) * 4; | 
					
						
							| 
									
										
										
										
											2007-03-18 17:43:48 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-07-19 06:43:05 +00:00
										 |  |  | /* TCP Fast Open */ | 
					
						
							|  |  |  | #define TCP_FASTOPEN_COOKIE_MIN	4	/* Min Fast Open Cookie size in bytes */
 | 
					
						
							|  |  |  | #define TCP_FASTOPEN_COOKIE_MAX	16	/* Max Fast Open Cookie size in bytes */
 | 
					
						
							| 
									
										
										
										
											2012-08-31 12:29:11 +00:00
										 |  |  | #define TCP_FASTOPEN_COOKIE_SIZE 8	/* the size employed by this impl. */
 | 
					
						
							| 
									
										
										
										
											2012-07-19 06:43:05 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* TCP Fast Open Cookie as stored in memory */ | 
					
						
							|  |  |  | struct tcp_fastopen_cookie { | 
					
						
							|  |  |  | 	s8	len; | 
					
						
							|  |  |  | 	u8	val[TCP_FASTOPEN_COOKIE_MAX]; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /* This defines a selective acknowledgement block. */ | 
					
						
							| 
									
										
										
										
											2006-09-27 18:32:28 -07:00
										 |  |  | struct tcp_sack_block_wire { | 
					
						
							|  |  |  | 	__be32	start_seq; | 
					
						
							|  |  |  | 	__be32	end_seq; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct tcp_sack_block { | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	start_seq; | 
					
						
							|  |  |  | 	u32	end_seq; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-12-20 13:23:24 +00:00
										 |  |  | /*These are used to set the sack_ok field in struct tcp_options_received */ | 
					
						
							|  |  |  | #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
 | 
					
						
							|  |  |  | #define TCP_FACK_ENABLED  (1 << 1)   /*1 = FACK is enabled locally*/
 | 
					
						
							|  |  |  | #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct tcp_options_received { | 
					
						
							|  |  |  | /*	PAWS/RTTM data	*/ | 
					
						
							|  |  |  | 	long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */ | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	ts_recent;	/* Time stamp to echo next		*/ | 
					
						
							|  |  |  | 	u32	rcv_tsval;	/* Time stamp value             	*/ | 
					
						
							|  |  |  | 	u32	rcv_tsecr;	/* Time stamp echo reply        	*/ | 
					
						
							|  |  |  | 	u16 	saw_tstamp : 1,	/* Saw TIMESTAMP on last packet		*/ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		tstamp_ok : 1,	/* TIMESTAMP seen on SYN packet		*/ | 
					
						
							|  |  |  | 		dsack : 1,	/* D-SACK is scheduled			*/ | 
					
						
							|  |  |  | 		wscale_ok : 1,	/* Wscale seen on SYN packet		*/ | 
					
						
							|  |  |  | 		sack_ok : 4,	/* SACK seen on SYN packet		*/ | 
					
						
							|  |  |  | 		snd_wscale : 4,	/* Window scaling received from sender	*/ | 
					
						
							|  |  |  | 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/ | 
					
						
							| 
									
										
											  
											
												TCPCT part 1d: define TCP cookie option, extend existing struct's
Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.
This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):
    http://thread.gmane.org/gmane.linux.network/102586
The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.
    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail
These functions will also be used in subsequent patches that implement
additional features.
Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2009-12-02 18:17:05 +00:00
										 |  |  | 	u8	cookie_plus:6,	/* bytes in authenticator/cookie option	*/ | 
					
						
							|  |  |  | 		cookie_out_never:1, | 
					
						
							|  |  |  | 		cookie_in_always:1; | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u8	num_sacks;	/* Number of SACK blocks		*/ | 
					
						
							| 
									
										
											  
											
												TCPCT part 1d: define TCP cookie option, extend existing struct's
Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.
This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):
    http://thread.gmane.org/gmane.linux.network/102586
The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.
    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail
These functions will also be used in subsequent patches that implement
additional features.
Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2009-12-02 18:17:05 +00:00
										 |  |  | 	u16	user_mss;	/* mss requested by user in ioctl	*/ | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-12-02 18:14:19 +00:00
										 |  |  | static inline void tcp_clear_options(struct tcp_options_received *rx_opt) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
											  
											
												TCPCT part 1d: define TCP cookie option, extend existing struct's
Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.
This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):
    http://thread.gmane.org/gmane.linux.network/102586
The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.
    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail
These functions will also be used in subsequent patches that implement
additional features.
Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2009-12-02 18:17:05 +00:00
										 |  |  | 	rx_opt->tstamp_ok = rx_opt->sack_ok = 0; | 
					
						
							|  |  |  | 	rx_opt->wscale_ok = rx_opt->snd_wscale = 0; | 
					
						
							|  |  |  | 	rx_opt->cookie_plus = 0; | 
					
						
							| 
									
										
										
										
											2009-12-02 18:14:19 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-19 00:07:02 -07:00
										 |  |  | /* This is the max number of SACKS that we'll generate and process. It's safe
 | 
					
						
							| 
									
										
											  
											
												TCPCT part 1d: define TCP cookie option, extend existing struct's
Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.
This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):
    http://thread.gmane.org/gmane.linux.network/102586
The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.
    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail
These functions will also be used in subsequent patches that implement
additional features.
Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2009-12-02 18:17:05 +00:00
										 |  |  |  * to increase this, although since: | 
					
						
							| 
									
										
										
										
											2008-07-19 00:07:02 -07:00
										 |  |  |  *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8) | 
					
						
							|  |  |  |  * only four options will fit in a standard TCP header */ | 
					
						
							|  |  |  | #define TCP_NUM_SACKS 4
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												TCPCT part 1d: define TCP cookie option, extend existing struct's
Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.
This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):
    http://thread.gmane.org/gmane.linux.network/102586
The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.
    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail
These functions will also be used in subsequent patches that implement
additional features.
Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2009-12-02 18:17:05 +00:00
										 |  |  | struct tcp_cookie_values; | 
					
						
							|  |  |  | struct tcp_request_sock_ops; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
  a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2005-06-18 22:46:52 -07:00
										 |  |  | struct tcp_request_sock { | 
					
						
							| 
									
										
										
										
											2006-11-14 19:07:45 -08:00
										 |  |  | 	struct inet_request_sock 	req; | 
					
						
							|  |  |  | #ifdef CONFIG_TCP_MD5SIG
 | 
					
						
							|  |  |  | 	/* Only used by TCP MD5 Signature so far. */ | 
					
						
							| 
									
										
										
										
											2009-09-01 19:25:03 +00:00
										 |  |  | 	const struct tcp_request_sock_ops *af_specific; | 
					
						
							| 
									
										
										
										
											2006-11-14 19:07:45 -08:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2012-08-31 12:29:11 +00:00
										 |  |  | 	struct sock			*listener; /* needed for TFO */ | 
					
						
							| 
									
										
											  
											
												TCPCT part 1d: define TCP cookie option, extend existing struct's
Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.
This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):
    http://thread.gmane.org/gmane.linux.network/102586
The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.
    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail
These functions will also be used in subsequent patches that implement
additional features.
Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2009-12-02 18:17:05 +00:00
										 |  |  | 	u32				rcv_isn; | 
					
						
							|  |  |  | 	u32				snt_isn; | 
					
						
							| 
									
										
										
										
											2011-06-08 11:08:38 +00:00
										 |  |  | 	u32				snt_synack; /* synack sent time */ | 
					
						
							| 
									
										
										
										
											2012-08-31 12:29:11 +00:00
										 |  |  | 	u32				rcv_nxt; /* the ack # by SYNACK. For
 | 
					
						
							|  |  |  | 						  * FastOpen it's the seq# | 
					
						
							|  |  |  | 						  * after data-in-SYN. | 
					
						
							|  |  |  | 						  */ | 
					
						
							| 
									
										
											  
											
												[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
  a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2005-06-18 22:46:52 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-06-18 22:47:21 -07:00
										 |  |  | static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) | 
					
						
							| 
									
										
											  
											
												[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
  a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2005-06-18 22:46:52 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	return (struct tcp_request_sock *)req; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct tcp_sock { | 
					
						
							| 
									
										
										
										
											2005-08-09 20:10:42 -07:00
										 |  |  | 	/* inet_connection_sock has to be the first member of tcp_sock */ | 
					
						
							|  |  |  | 	struct inet_connection_sock	inet_conn; | 
					
						
							| 
									
										
										
										
											2006-11-28 00:48:32 -02:00
										 |  |  | 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/ | 
					
						
							| 
									
										
										
										
											2009-03-14 22:45:16 +00:00
										 |  |  | 	u16	xmit_size_goal_segs; /* Goal for segmenting output packets */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  *	Header prediction flags | 
					
						
							|  |  |  |  *	0x5?10 << 16 + snd_wnd in net byte order | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2006-09-27 18:32:46 -07:00
										 |  |  | 	__be32	pred_flags; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  *	RFC793 variables by their proper names. This means you can | 
					
						
							|  |  |  |  *	read the code and the spec side by side (and laugh ...) | 
					
						
							|  |  |  |  *	See RFC793 and RFC1122. The RFC writes these in capitals. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  |  	u32	rcv_nxt;	/* What we want to receive next 	*/ | 
					
						
							| 
									
										
										
										
											2007-02-22 03:20:44 -08:00
										 |  |  | 	u32	copied_seq;	/* Head of yet unread data		*/ | 
					
						
							|  |  |  | 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/ | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  |  	u32	snd_nxt;	/* Next sequence we send		*/ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  |  	u32	snd_una;	/* First byte we want an ack for	*/ | 
					
						
							|  |  |  |  	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */ | 
					
						
							|  |  |  | 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */ | 
					
						
							|  |  |  | 	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-02-11 05:50:17 +00:00
										 |  |  | 	u32	tsoffset;	/* timestamp offset */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												tcp: TCP Small Queues
This introduce TSQ (TCP Small Queues)
TSQ goal is to reduce number of TCP packets in xmit queues (qdisc &
device queues), to reduce RTT and cwnd bias, part of the bufferbloat
problem.
sk->sk_wmem_alloc not allowed to grow above a given limit,
allowing no more than ~128KB [1] per tcp socket in qdisc/dev layers at a
given time.
TSO packets are sized/capped to half the limit, so that we have two
TSO packets in flight, allowing better bandwidth use.
As a side effect, setting the limit to 40000 automatically reduces the
standard gso max limit (65536) to 40000/2 : It can help to reduce
latencies of high prio packets, having smaller TSO packets.
This means we divert sock_wfree() to a tcp_wfree() handler, to
queue/send following frames when skb_orphan() [2] is called for the
already queued skbs.
Results on my dev machines (tg3/ixgbe nics) are really impressive,
using standard pfifo_fast, and with or without TSO/GSO.
Without reduction of nominal bandwidth, we have reduction of buffering
per bulk sender :
< 1ms on Gbit (instead of 50ms with TSO)
< 8ms on 100Mbit (instead of 132 ms)
I no longer have 4 MBytes backlogged in qdisc by a single netperf
session, and both side socket autotuning no longer use 4 Mbytes.
As skb destructor cannot restart xmit itself ( as qdisc lock might be
taken at this point ), we delegate the work to a tasklet. We use one
tasklest per cpu for performance reasons.
If tasklet finds a socket owned by the user, it sets TSQ_OWNED flag.
This flag is tested in a new protocol method called from release_sock(),
to eventually send new segments.
[1] New /proc/sys/net/ipv4/tcp_limit_output_bytes tunable
[2] skb_orphan() is usually called at TX completion time,
  but some drivers call it in their start_xmit() handler.
  These drivers should at least use BQL, or else a single TCP
  session can still fill the whole NIC TX ring, since TSQ will
  have no effect.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Dave Taht <dave.taht@bufferbloat.net>
Cc: Tom Herbert <therbert@google.com>
Cc: Matt Mathis <mattmathis@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2012-07-11 05:50:31 +00:00
										 |  |  | 	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ | 
					
						
							|  |  |  | 	unsigned long	tsq_flags; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	/* Data for direct copy to user */ | 
					
						
							|  |  |  | 	struct { | 
					
						
							|  |  |  | 		struct sk_buff_head	prequeue; | 
					
						
							|  |  |  | 		struct task_struct	*task; | 
					
						
							|  |  |  | 		struct iovec		*iov; | 
					
						
							|  |  |  | 		int			memory; | 
					
						
							|  |  |  | 		int			len; | 
					
						
							| 
									
										
										
										
											2006-05-23 17:55:33 -07:00
										 |  |  | #ifdef CONFIG_NET_DMA
 | 
					
						
							|  |  |  | 		/* members for async copy */ | 
					
						
							|  |  |  | 		struct dma_chan		*dma_chan; | 
					
						
							|  |  |  | 		int			wakeup; | 
					
						
							|  |  |  | 		struct dma_pinned_list	*pinned_list; | 
					
						
							|  |  |  | 		dma_cookie_t		dma_cookie; | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	} ucopy; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	snd_wl1;	/* Sequence for window update		*/ | 
					
						
							|  |  |  | 	u32	snd_wnd;	/* The window we expect to receive	*/ | 
					
						
							|  |  |  | 	u32	max_window;	/* Maximal window ever seen from peer	*/ | 
					
						
							|  |  |  | 	u32	mss_cache;	/* Cached effective mss, not including SACKS */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	window_clamp;	/* Maximal window to advertise		*/ | 
					
						
							|  |  |  | 	u32	rcv_ssthresh;	/* Current window clamp			*/ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	frto_highmark;	/* snd_nxt when RTO occurred */ | 
					
						
							| 
									
										
											  
											
												tcp: Reorganize tcp_sock to fill 64-bit holes & improve locality
I tried to group recovery related fields nearby (non-CA_Open related
variables, to be more accurate) so that one to three cachelines would
not be necessary in CA_Open. These are now contiguously deployed:
  struct sk_buff_head        out_of_order_queue;   /*  1968    80 */
  /* --- cacheline 32 boundary (2048 bytes) --- */
  struct tcp_sack_block      duplicate_sack[1];    /*  2048     8 */
  struct tcp_sack_block      selective_acks[4];    /*  2056    32 */
  struct tcp_sack_block      recv_sack_cache[4];   /*  2088    32 */
  /* --- cacheline 33 boundary (2112 bytes) was 8 bytes ago --- */
  struct sk_buff *           highest_sack;         /*  2120     8 */
  int                        lost_cnt_hint;        /*  2128     4 */
  int                        retransmit_cnt_hint;  /*  2132     4 */
  u32                        lost_retrans_low;     /*  2136     4 */
  u8                         reordering;           /*  2140     1 */
  u8                         keepalive_probes;     /*  2141     1 */
  /* XXX 2 bytes hole, try to pack */
  u32                        prior_ssthresh;       /*  2144     4 */
  u32                        high_seq;             /*  2148     4 */
  u32                        retrans_stamp;        /*  2152     4 */
  u32                        undo_marker;          /*  2156     4 */
  int                        undo_retrans;         /*  2160     4 */
  u32                        total_retrans;        /*  2164     4 */
...and they're then followed by URG slowpath & keepalive related
variables.
Head of the out_of_order_queue always needed for empty checks, if
that's empty (and TCP is in CA_Open), following ~200 bytes (in 64-bit)
shouldn't be necessary for anything. If only OFO queue exists but TCP
is in CA_Open, selective_acks (and possibly duplicate_sack) are
necessary besides the out_of_order_queue but the rest of the block
again shouldn't be (ie., the other direction had losses).
As the cacheline boundaries depend on many factors in the preceeding
stuff, trying to align considering them doesn't make too much sense.
Commented one ordering hazard.
There are number of low utilized u8/16s that could be combined get 2
bytes less in total so that the hole could be made to vanish (includes
at least ecn_flags, urg_data, urg_mode, frto_counter, nonagle).
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2008-05-29 03:25:23 -07:00
										 |  |  | 	u16	advmss;		/* Advertised MSS			*/ | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u8	frto_counter;	/* Number of new acks after RTO */ | 
					
						
							| 
									
										
										
										
											2010-02-18 02:47:01 +00:00
										 |  |  | 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */ | 
					
						
							|  |  |  | 		thin_lto    : 1,/* Use linear timeouts for thin streams */ | 
					
						
							| 
									
										
										
										
											2010-02-18 04:48:19 +00:00
										 |  |  | 		thin_dupack : 1,/* Fast retransmit on first dupack      */ | 
					
						
							| 
									
										
										
										
											2012-04-19 03:40:39 +00:00
										 |  |  | 		repair      : 1, | 
					
						
							|  |  |  | 		unused      : 1; | 
					
						
							|  |  |  | 	u8	repair_queue; | 
					
						
							| 
									
										
										
										
											2012-05-02 13:30:04 +00:00
										 |  |  | 	u8	do_early_retrans:1,/* Enable RFC5827 early-retransmit  */ | 
					
						
							| 
									
										
										
										
											2012-07-19 06:43:07 +00:00
										 |  |  | 		early_retrans_delayed:1, /* Delayed ER timer installed */ | 
					
						
							| 
									
										
										
										
											2012-07-19 06:43:11 +00:00
										 |  |  | 		syn_data:1,	/* SYN includes data */ | 
					
						
							| 
									
										
										
										
											2012-10-19 15:14:44 +00:00
										 |  |  | 		syn_fastopen:1,	/* SYN includes Fast Open option */ | 
					
						
							|  |  |  | 		syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* RTT measurement */ | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	srtt;		/* smoothed round trip time << 3	*/ | 
					
						
							|  |  |  | 	u32	mdev;		/* medium deviation			*/ | 
					
						
							|  |  |  | 	u32	mdev_max;	/* maximal mdev for the last rtt period	*/ | 
					
						
							|  |  |  | 	u32	rttvar;		/* smoothed mdev_max			*/ | 
					
						
							|  |  |  | 	u32	rtt_seq;	/* sequence number to update rttvar	*/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	u32	packets_out;	/* Packets which are "in flight"	*/ | 
					
						
							|  |  |  | 	u32	retrans_out;	/* Retransmitted packets out		*/ | 
					
						
							| 
									
										
											  
											
												tcp: Reorganize tcp_sock to fill 64-bit holes & improve locality
I tried to group recovery related fields nearby (non-CA_Open related
variables, to be more accurate) so that one to three cachelines would
not be necessary in CA_Open. These are now contiguously deployed:
  struct sk_buff_head        out_of_order_queue;   /*  1968    80 */
  /* --- cacheline 32 boundary (2048 bytes) --- */
  struct tcp_sack_block      duplicate_sack[1];    /*  2048     8 */
  struct tcp_sack_block      selective_acks[4];    /*  2056    32 */
  struct tcp_sack_block      recv_sack_cache[4];   /*  2088    32 */
  /* --- cacheline 33 boundary (2112 bytes) was 8 bytes ago --- */
  struct sk_buff *           highest_sack;         /*  2120     8 */
  int                        lost_cnt_hint;        /*  2128     4 */
  int                        retransmit_cnt_hint;  /*  2132     4 */
  u32                        lost_retrans_low;     /*  2136     4 */
  u8                         reordering;           /*  2140     1 */
  u8                         keepalive_probes;     /*  2141     1 */
  /* XXX 2 bytes hole, try to pack */
  u32                        prior_ssthresh;       /*  2144     4 */
  u32                        high_seq;             /*  2148     4 */
  u32                        retrans_stamp;        /*  2152     4 */
  u32                        undo_marker;          /*  2156     4 */
  int                        undo_retrans;         /*  2160     4 */
  u32                        total_retrans;        /*  2164     4 */
...and they're then followed by URG slowpath & keepalive related
variables.
Head of the out_of_order_queue always needed for empty checks, if
that's empty (and TCP is in CA_Open), following ~200 bytes (in 64-bit)
shouldn't be necessary for anything. If only OFO queue exists but TCP
is in CA_Open, selective_acks (and possibly duplicate_sack) are
necessary besides the out_of_order_queue but the rest of the block
again shouldn't be (ie., the other direction had losses).
As the cacheline boundaries depend on many factors in the preceeding
stuff, trying to align considering them doesn't make too much sense.
Commented one ordering hazard.
There are number of low utilized u8/16s that could be combined get 2
bytes less in total so that the hole could be made to vanish (includes
at least ecn_flags, urg_data, urg_mode, frto_counter, nonagle).
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2008-05-29 03:25:23 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	u16	urg_data;	/* Saved octet of OOB data and control flags */ | 
					
						
							|  |  |  | 	u8	ecn_flags;	/* ECN status bits.			*/ | 
					
						
							| 
									
										
										
										
											2008-10-07 14:43:06 -07:00
										 |  |  | 	u8	reordering;	/* Packet reordering metric.		*/ | 
					
						
							|  |  |  | 	u32	snd_up;		/* Urgent pointer		*/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	u8	keepalive_probes; /* num of allowed keep alive probes	*/ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  *      Options received (usually on last packet, some only on SYN packets). | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 	struct tcp_options_received rx_opt; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  *	Slow start and congestion control (see also Nagle, and Karn & Partridge) | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  |  	u32	snd_ssthresh;	/* Slow start size threshold		*/ | 
					
						
							|  |  |  |  	u32	snd_cwnd;	/* Sending congestion window		*/ | 
					
						
							| 
									
										
										
										
											2007-10-15 12:59:43 -07:00
										 |  |  | 	u32	snd_cwnd_cnt;	/* Linear increase counter		*/ | 
					
						
							| 
									
										
										
										
											2007-02-22 22:52:59 -08:00
										 |  |  | 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	snd_cwnd_used; | 
					
						
							|  |  |  | 	u32	snd_cwnd_stamp; | 
					
						
							| 
									
										
										
										
											2011-08-21 20:21:57 +00:00
										 |  |  | 	u32	prior_cwnd;	/* Congestion window at start of Recovery. */ | 
					
						
							|  |  |  | 	u32	prr_delivered;	/* Number of newly delivered packets to
 | 
					
						
							|  |  |  | 				 * receiver in Recovery. */ | 
					
						
							|  |  |  | 	u32	prr_out;	/* Total number of pkts sent during Recovery. */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  |  	u32	rcv_wnd;	/* Current receiver window		*/ | 
					
						
							|  |  |  | 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */ | 
					
						
							|  |  |  | 	u32	pushed_seq;	/* Last pushed seq, required to talk to windows */ | 
					
						
							| 
									
										
											  
											
												tcp: Reorganize tcp_sock to fill 64-bit holes & improve locality
I tried to group recovery related fields nearby (non-CA_Open related
variables, to be more accurate) so that one to three cachelines would
not be necessary in CA_Open. These are now contiguously deployed:
  struct sk_buff_head        out_of_order_queue;   /*  1968    80 */
  /* --- cacheline 32 boundary (2048 bytes) --- */
  struct tcp_sack_block      duplicate_sack[1];    /*  2048     8 */
  struct tcp_sack_block      selective_acks[4];    /*  2056    32 */
  struct tcp_sack_block      recv_sack_cache[4];   /*  2088    32 */
  /* --- cacheline 33 boundary (2112 bytes) was 8 bytes ago --- */
  struct sk_buff *           highest_sack;         /*  2120     8 */
  int                        lost_cnt_hint;        /*  2128     4 */
  int                        retransmit_cnt_hint;  /*  2132     4 */
  u32                        lost_retrans_low;     /*  2136     4 */
  u8                         reordering;           /*  2140     1 */
  u8                         keepalive_probes;     /*  2141     1 */
  /* XXX 2 bytes hole, try to pack */
  u32                        prior_ssthresh;       /*  2144     4 */
  u32                        high_seq;             /*  2148     4 */
  u32                        retrans_stamp;        /*  2152     4 */
  u32                        undo_marker;          /*  2156     4 */
  int                        undo_retrans;         /*  2160     4 */
  u32                        total_retrans;        /*  2164     4 */
...and they're then followed by URG slowpath & keepalive related
variables.
Head of the out_of_order_queue always needed for empty checks, if
that's empty (and TCP is in CA_Open), following ~200 bytes (in 64-bit)
shouldn't be necessary for anything. If only OFO queue exists but TCP
is in CA_Open, selective_acks (and possibly duplicate_sack) are
necessary besides the out_of_order_queue but the rest of the block
again shouldn't be (ie., the other direction had losses).
As the cacheline boundaries depend on many factors in the preceeding
stuff, trying to align considering them doesn't make too much sense.
Commented one ordering hazard.
There are number of low utilized u8/16s that could be combined get 2
bytes less in total so that the hole could be made to vanish (includes
at least ecn_flags, urg_data, urg_mode, frto_counter, nonagle).
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2008-05-29 03:25:23 -07:00
										 |  |  | 	u32	lost_out;	/* Lost packets			*/ | 
					
						
							|  |  |  | 	u32	sacked_out;	/* SACK'd packets			*/ | 
					
						
							|  |  |  | 	u32	fackets_out;	/* FACK'd packets			*/ | 
					
						
							|  |  |  | 	u32	tso_deferred; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												tcp: Reorganize tcp_sock to fill 64-bit holes & improve locality
I tried to group recovery related fields nearby (non-CA_Open related
variables, to be more accurate) so that one to three cachelines would
not be necessary in CA_Open. These are now contiguously deployed:
  struct sk_buff_head        out_of_order_queue;   /*  1968    80 */
  /* --- cacheline 32 boundary (2048 bytes) --- */
  struct tcp_sack_block      duplicate_sack[1];    /*  2048     8 */
  struct tcp_sack_block      selective_acks[4];    /*  2056    32 */
  struct tcp_sack_block      recv_sack_cache[4];   /*  2088    32 */
  /* --- cacheline 33 boundary (2112 bytes) was 8 bytes ago --- */
  struct sk_buff *           highest_sack;         /*  2120     8 */
  int                        lost_cnt_hint;        /*  2128     4 */
  int                        retransmit_cnt_hint;  /*  2132     4 */
  u32                        lost_retrans_low;     /*  2136     4 */
  u8                         reordering;           /*  2140     1 */
  u8                         keepalive_probes;     /*  2141     1 */
  /* XXX 2 bytes hole, try to pack */
  u32                        prior_ssthresh;       /*  2144     4 */
  u32                        high_seq;             /*  2148     4 */
  u32                        retrans_stamp;        /*  2152     4 */
  u32                        undo_marker;          /*  2156     4 */
  int                        undo_retrans;         /*  2160     4 */
  u32                        total_retrans;        /*  2164     4 */
...and they're then followed by URG slowpath & keepalive related
variables.
Head of the out_of_order_queue always needed for empty checks, if
that's empty (and TCP is in CA_Open), following ~200 bytes (in 64-bit)
shouldn't be necessary for anything. If only OFO queue exists but TCP
is in CA_Open, selective_acks (and possibly duplicate_sack) are
necessary besides the out_of_order_queue but the rest of the block
again shouldn't be (ie., the other direction had losses).
As the cacheline boundaries depend on many factors in the preceeding
stuff, trying to align considering them doesn't make too much sense.
Commented one ordering hazard.
There are number of low utilized u8/16s that could be combined get 2
bytes less in total so that the hole could be made to vanish (includes
at least ecn_flags, urg_data, urg_mode, frto_counter, nonagle).
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2008-05-29 03:25:23 -07:00
										 |  |  | 	/* from STCP, retrans queue hinting */ | 
					
						
							|  |  |  | 	struct sk_buff* lost_skb_hint; | 
					
						
							|  |  |  | 	struct sk_buff *scoreboard_skb_hint; | 
					
						
							|  |  |  | 	struct sk_buff *retransmit_skb_hint; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	struct sk_buff_head	out_of_order_queue; /* Out of order segments go here */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-05-07 15:33:04 -04:00
										 |  |  | 	/* SACKs data, these 2 need to be together (see tcp_options_write) */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ | 
					
						
							|  |  |  | 	struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-15 19:49:47 -08:00
										 |  |  | 	struct tcp_sack_block recv_sack_cache[4]; | 
					
						
							| 
									
										
										
										
											2005-11-10 17:14:59 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-02-27 17:52:52 -05:00
										 |  |  | 	struct sk_buff *highest_sack;   /* skb just after the highest
 | 
					
						
							|  |  |  | 					 * skb with SACKed bit set | 
					
						
							| 
									
										
										
										
											2007-11-15 19:41:46 -08:00
										 |  |  | 					 * (validity guaranteed only if | 
					
						
							|  |  |  | 					 * sacked_out > 0) | 
					
						
							|  |  |  | 					 */ | 
					
						
							| 
									
										
										
										
											2007-03-24 21:03:23 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-10 17:14:59 -08:00
										 |  |  | 	int     lost_cnt_hint; | 
					
						
							| 
									
										
										
										
											2008-09-20 21:20:20 -07:00
										 |  |  | 	u32     retransmit_high;	/* L-bits may be on up to this seqno */ | 
					
						
							| 
									
										
										
										
											2005-11-10 17:14:59 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-10-11 17:36:13 -07:00
										 |  |  | 	u32	lost_retrans_low;	/* Sent seq after any rxmit (lowest) */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-05-21 17:40:05 -07:00
										 |  |  | 	u32	prior_ssthresh; /* ssthresh saved at recovery start	*/ | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	high_seq;	/* snd_nxt at onset of congestion	*/ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	retrans_stamp;	/* Timestamp of the last retransmit,
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 				 * also used in SYN-SENT to remember stamp of | 
					
						
							|  |  |  | 				 * the first SYN. */ | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	undo_marker;	/* tracking retrans started here. */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int	undo_retrans;	/* number of undoable retransmissions. */ | 
					
						
							| 
									
										
											  
											
												tcp: Reorganize tcp_sock to fill 64-bit holes & improve locality
I tried to group recovery related fields nearby (non-CA_Open related
variables, to be more accurate) so that one to three cachelines would
not be necessary in CA_Open. These are now contiguously deployed:
  struct sk_buff_head        out_of_order_queue;   /*  1968    80 */
  /* --- cacheline 32 boundary (2048 bytes) --- */
  struct tcp_sack_block      duplicate_sack[1];    /*  2048     8 */
  struct tcp_sack_block      selective_acks[4];    /*  2056    32 */
  struct tcp_sack_block      recv_sack_cache[4];   /*  2088    32 */
  /* --- cacheline 33 boundary (2112 bytes) was 8 bytes ago --- */
  struct sk_buff *           highest_sack;         /*  2120     8 */
  int                        lost_cnt_hint;        /*  2128     4 */
  int                        retransmit_cnt_hint;  /*  2132     4 */
  u32                        lost_retrans_low;     /*  2136     4 */
  u8                         reordering;           /*  2140     1 */
  u8                         keepalive_probes;     /*  2141     1 */
  /* XXX 2 bytes hole, try to pack */
  u32                        prior_ssthresh;       /*  2144     4 */
  u32                        high_seq;             /*  2148     4 */
  u32                        retrans_stamp;        /*  2152     4 */
  u32                        undo_marker;          /*  2156     4 */
  int                        undo_retrans;         /*  2160     4 */
  u32                        total_retrans;        /*  2164     4 */
...and they're then followed by URG slowpath & keepalive related
variables.
Head of the out_of_order_queue always needed for empty checks, if
that's empty (and TCP is in CA_Open), following ~200 bytes (in 64-bit)
shouldn't be necessary for anything. If only OFO queue exists but TCP
is in CA_Open, selective_acks (and possibly duplicate_sack) are
necessary besides the out_of_order_queue but the rest of the block
again shouldn't be (ie., the other direction had losses).
As the cacheline boundaries depend on many factors in the preceeding
stuff, trying to align considering them doesn't make too much sense.
Commented one ordering hazard.
There are number of low utilized u8/16s that could be combined get 2
bytes less in total so that the hole could be made to vanish (includes
at least ecn_flags, urg_data, urg_mode, frto_counter, nonagle).
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2008-05-29 03:25:23 -07:00
										 |  |  | 	u32	total_retrans;	/* Total retransmits for entire connection */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32	urg_seq;	/* Seq of received urgent pointer */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	unsigned int		keepalive_time;	  /* time before keep alive takes place */ | 
					
						
							|  |  |  | 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-19 09:43:48 +00:00
										 |  |  | 	int			linger2; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* Receiver side RTT estimation */ | 
					
						
							|  |  |  | 	struct { | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 		u32	rtt; | 
					
						
							|  |  |  | 		u32	seq; | 
					
						
							|  |  |  | 		u32	time; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	} rcv_rtt_est; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Receiver queue space */ | 
					
						
							|  |  |  | 	struct { | 
					
						
							|  |  |  | 		int	space; | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 		u32	seq; | 
					
						
							|  |  |  | 		u32	time; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	} rcvq_space; | 
					
						
							| 
									
										
										
										
											2006-03-20 21:32:58 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* TCP-specific MTU probe information. */ | 
					
						
							|  |  |  | 	struct { | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 		u32		  probe_seq_start; | 
					
						
							|  |  |  | 		u32		  probe_seq_end; | 
					
						
							| 
									
										
										
										
											2006-03-20 21:32:58 -08:00
										 |  |  | 	} mtu_probe; | 
					
						
							| 
									
										
										
										
											2012-07-23 09:48:52 +02:00
										 |  |  | 	u32	mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
 | 
					
						
							|  |  |  | 			   * while socket was owned by user. | 
					
						
							|  |  |  | 			   */ | 
					
						
							| 
									
										
										
										
											2006-11-14 19:07:45 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | #ifdef CONFIG_TCP_MD5SIG
 | 
					
						
							|  |  |  | /* TCP AF-Specific parts; only used by MD5 Signature support so far */ | 
					
						
							| 
									
										
										
										
											2009-09-01 19:25:03 +00:00
										 |  |  | 	const struct tcp_sock_af_ops	*af_specific; | 
					
						
							| 
									
										
										
										
											2006-11-14 19:07:45 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-09-01 19:25:03 +00:00
										 |  |  | /* TCP MD5 Signature Option information */ | 
					
						
							| 
									
										
										
										
											2012-01-31 18:45:40 +00:00
										 |  |  | 	struct tcp_md5sig_info	__rcu *md5sig_info; | 
					
						
							| 
									
										
										
										
											2006-11-14 19:07:45 -08:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
											  
											
												TCPCT part 1d: define TCP cookie option, extend existing struct's
Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.
This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):
    http://thread.gmane.org/gmane.linux.network/102586
The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.
    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail
These functions will also be used in subsequent patches that implement
additional features.
Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2009-12-02 18:17:05 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* When the cookie options are generated and exchanged, then this
 | 
					
						
							|  |  |  | 	 * object holds a reference to them (cookie_values->kref).  Also | 
					
						
							|  |  |  | 	 * contains related tcp_cookie_transactions fields. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	struct tcp_cookie_values  *cookie_values; | 
					
						
							| 
									
										
										
										
											2012-08-31 12:29:11 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* TCP fastopen related information */ | 
					
						
							|  |  |  | 	struct tcp_fastopen_request *fastopen_req; | 
					
						
							|  |  |  | 	/* fastopen_rsk points to request_sock that resulted in this big
 | 
					
						
							|  |  |  | 	 * socket. Used to retransmit SYNACKs etc. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	struct request_sock *fastopen_rsk; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												tcp: TCP Small Queues
This introduce TSQ (TCP Small Queues)
TSQ goal is to reduce number of TCP packets in xmit queues (qdisc &
device queues), to reduce RTT and cwnd bias, part of the bufferbloat
problem.
sk->sk_wmem_alloc not allowed to grow above a given limit,
allowing no more than ~128KB [1] per tcp socket in qdisc/dev layers at a
given time.
TSO packets are sized/capped to half the limit, so that we have two
TSO packets in flight, allowing better bandwidth use.
As a side effect, setting the limit to 40000 automatically reduces the
standard gso max limit (65536) to 40000/2 : It can help to reduce
latencies of high prio packets, having smaller TSO packets.
This means we divert sock_wfree() to a tcp_wfree() handler, to
queue/send following frames when skb_orphan() [2] is called for the
already queued skbs.
Results on my dev machines (tg3/ixgbe nics) are really impressive,
using standard pfifo_fast, and with or without TSO/GSO.
Without reduction of nominal bandwidth, we have reduction of buffering
per bulk sender :
< 1ms on Gbit (instead of 50ms with TSO)
< 8ms on 100Mbit (instead of 132 ms)
I no longer have 4 MBytes backlogged in qdisc by a single netperf
session, and both side socket autotuning no longer use 4 Mbytes.
As skb destructor cannot restart xmit itself ( as qdisc lock might be
taken at this point ), we delegate the work to a tasklet. We use one
tasklest per cpu for performance reasons.
If tasklet finds a socket owned by the user, it sets TSQ_OWNED flag.
This flag is tested in a new protocol method called from release_sock(),
to eventually send new segments.
[1] New /proc/sys/net/ipv4/tcp_limit_output_bytes tunable
[2] skb_orphan() is usually called at TX completion time,
  but some drivers call it in their start_xmit() handler.
  These drivers should at least use BQL, or else a single TCP
  session can still fill the whole NIC TX ring, since TSQ will
  have no effect.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Dave Taht <dave.taht@bufferbloat.net>
Cc: Tom Herbert <therbert@google.com>
Cc: Matt Mathis <mattmathis@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2012-07-11 05:50:31 +00:00
										 |  |  | enum tsq_flags { | 
					
						
							|  |  |  | 	TSQ_THROTTLED, | 
					
						
							|  |  |  | 	TSQ_QUEUED, | 
					
						
							| 
									
										
										
										
											2012-07-20 05:45:50 +00:00
										 |  |  | 	TCP_TSQ_DEFERRED,	   /* tcp_tasklet_func() found socket was owned */ | 
					
						
							|  |  |  | 	TCP_WRITE_TIMER_DEFERRED,  /* tcp_write_timer() found socket was owned */ | 
					
						
							|  |  |  | 	TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ | 
					
						
							| 
									
										
										
										
											2012-07-23 09:48:52 +02:00
										 |  |  | 	TCP_MTU_REDUCED_DEFERRED,  /* tcp_v{4|6}_err() could not call
 | 
					
						
							|  |  |  | 				    * tcp_v{4|6}_mtu_reduced() | 
					
						
							|  |  |  | 				    */ | 
					
						
							| 
									
										
											  
											
												tcp: TCP Small Queues
This introduce TSQ (TCP Small Queues)
TSQ goal is to reduce number of TCP packets in xmit queues (qdisc &
device queues), to reduce RTT and cwnd bias, part of the bufferbloat
problem.
sk->sk_wmem_alloc not allowed to grow above a given limit,
allowing no more than ~128KB [1] per tcp socket in qdisc/dev layers at a
given time.
TSO packets are sized/capped to half the limit, so that we have two
TSO packets in flight, allowing better bandwidth use.
As a side effect, setting the limit to 40000 automatically reduces the
standard gso max limit (65536) to 40000/2 : It can help to reduce
latencies of high prio packets, having smaller TSO packets.
This means we divert sock_wfree() to a tcp_wfree() handler, to
queue/send following frames when skb_orphan() [2] is called for the
already queued skbs.
Results on my dev machines (tg3/ixgbe nics) are really impressive,
using standard pfifo_fast, and with or without TSO/GSO.
Without reduction of nominal bandwidth, we have reduction of buffering
per bulk sender :
< 1ms on Gbit (instead of 50ms with TSO)
< 8ms on 100Mbit (instead of 132 ms)
I no longer have 4 MBytes backlogged in qdisc by a single netperf
session, and both side socket autotuning no longer use 4 Mbytes.
As skb destructor cannot restart xmit itself ( as qdisc lock might be
taken at this point ), we delegate the work to a tasklet. We use one
tasklest per cpu for performance reasons.
If tasklet finds a socket owned by the user, it sets TSQ_OWNED flag.
This flag is tested in a new protocol method called from release_sock(),
to eventually send new segments.
[1] New /proc/sys/net/ipv4/tcp_limit_output_bytes tunable
[2] skb_orphan() is usually called at TX completion time,
  but some drivers call it in their start_xmit() handler.
  These drivers should at least use BQL, or else a single TCP
  session can still fill the whole NIC TX ring, since TSQ will
  have no effect.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Dave Taht <dave.taht@bufferbloat.net>
Cc: Tom Herbert <therbert@google.com>
Cc: Matt Mathis <mattmathis@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2012-07-11 05:50:31 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | static inline struct tcp_sock *tcp_sk(const struct sock *sk) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return (struct tcp_sock *)sk; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-08-09 20:09:30 -07:00
										 |  |  | struct tcp_timewait_sock { | 
					
						
							|  |  |  | 	struct inet_timewait_sock tw_sk; | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32			  tw_rcv_nxt; | 
					
						
							|  |  |  | 	u32			  tw_snd_nxt; | 
					
						
							|  |  |  | 	u32			  tw_rcv_wnd; | 
					
						
							| 
									
										
										
										
											2013-02-11 05:50:17 +00:00
										 |  |  | 	u32			  tw_ts_offset; | 
					
						
							| 
									
										
										
										
											2006-11-28 01:12:38 -02:00
										 |  |  | 	u32			  tw_ts_recent; | 
					
						
							| 
									
										
										
										
											2005-08-09 20:09:30 -07:00
										 |  |  | 	long			  tw_ts_recent_stamp; | 
					
						
							| 
									
										
										
										
											2006-11-14 19:07:45 -08:00
										 |  |  | #ifdef CONFIG_TCP_MD5SIG
 | 
					
						
							| 
									
										
										
										
											2012-06-09 14:56:12 -07:00
										 |  |  | 	struct tcp_md5sig_key	  *tw_md5_key; | 
					
						
							| 
									
										
										
										
											2006-11-14 19:07:45 -08:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
											  
											
												TCPCT part 1d: define TCP cookie option, extend existing struct's
Data structures are carefully composed to require minimal additions.
For example, the struct tcp_options_received cookie_plus variable fits
between existing 16-bit and 8-bit variables, requiring no additional
space (taking alignment into consideration).  There are no additions to
tcp_request_sock, and only 1 pointer in tcp_sock.
This is a significantly revised implementation of an earlier (year-old)
patch that no longer applies cleanly, with permission of the original
author (Adam Langley):
    http://thread.gmane.org/gmane.linux.network/102586
The principle difference is using a TCP option to carry the cookie nonce,
instead of a user configured offset in the data.  This is more flexible and
less subject to user configuration error.  Such a cookie option has been
suggested for many years, and is also useful without SYN data, allowing
several related concepts to use the same extension option.
    "Re: SYN floods (was: does history repeat itself?)", September 9, 1996.
    http://www.merit.net/mail.archives/nanog/1996-09/msg00235.html
    "Re: what a new TCP header might look like", May 12, 1998.
    ftp://ftp.isi.edu/end2end/end2end-interest-1998.mail
These functions will also be used in subsequent patches that implement
additional features.
Requires:
   TCPCT part 1a: add request_values parameter for sending SYNACK
   TCPCT part 1b: generate Responder Cookie secret
   TCPCT part 1c: sysctl_tcp_cookie_size, socket option TCP_COOKIE_TRANSACTIONS
Signed-off-by: William.Allen.Simpson@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2009-12-02 18:17:05 +00:00
										 |  |  | 	/* Few sockets in timewait have cookies; in that case, then this
 | 
					
						
							|  |  |  | 	 * object holds a reference to them (tw_cookie_values->kref). | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	struct tcp_cookie_values  *tw_cookie_values; | 
					
						
							| 
									
										
										
										
											2005-08-09 20:09:30 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return (struct tcp_timewait_sock *)sk; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-31 12:29:11 +00:00
										 |  |  | static inline bool tcp_passive_fastopen(const struct sock *sk) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return (sk->sk_state == TCP_SYN_RECV && | 
					
						
							|  |  |  | 		tcp_sk(sk)->fastopen_rsk != NULL); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return foc->len != -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-09-18 14:19:23 +00:00
										 |  |  | extern void tcp_sock_destruct(struct sock *sk); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-31 12:29:11 +00:00
										 |  |  | static inline int fastopen_init_queue(struct sock *sk, int backlog) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct request_sock_queue *queue = | 
					
						
							|  |  |  | 	    &inet_csk(sk)->icsk_accept_queue; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (queue->fastopenq == NULL) { | 
					
						
							|  |  |  | 		queue->fastopenq = kzalloc( | 
					
						
							|  |  |  | 		    sizeof(struct fastopen_queue), | 
					
						
							|  |  |  | 		    sk->sk_allocation); | 
					
						
							|  |  |  | 		if (queue->fastopenq == NULL) | 
					
						
							|  |  |  | 			return -ENOMEM; | 
					
						
							| 
									
										
										
										
											2012-09-18 14:19:23 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		sk->sk_destruct = tcp_sock_destruct; | 
					
						
							| 
									
										
										
										
											2012-08-31 12:29:11 +00:00
										 |  |  | 		spin_lock_init(&queue->fastopenq->lock); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	queue->fastopenq->max_qlen = backlog; | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #endif	/* _LINUX_TCP_H */
 |