 2d36097d26
			
		
	
	
	2d36097d26
	
	
	
		
			
			This patch adds a queue mapping mode to the fanout operation of af_packet sockets. This allows user space af_packet users to better filter on flows ingressing and egressing via a specific hardware queue, and avoids the potential packet reordering that can occur when FANOUT_CPU is being used and irq affinity varies. Tested successfully by myself. applies to net-next Signed-off-by: Neil Horman <nhorman@tuxdriver.com> CC: "David S. Miller" <davem@davemloft.net> Signed-off-by: David S. Miller <davem@davemloft.net>
		
			
				
	
	
		
			290 lines
		
	
	
	
		
			7.3 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			290 lines
		
	
	
	
		
			7.3 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| #ifndef __LINUX_IF_PACKET_H
 | |
| #define __LINUX_IF_PACKET_H
 | |
| 
 | |
| #include <linux/types.h>
 | |
| 
 | |
| struct sockaddr_pkt {
 | |
| 	unsigned short spkt_family;
 | |
| 	unsigned char spkt_device[14];
 | |
| 	__be16 spkt_protocol;
 | |
| };
 | |
| 
 | |
| struct sockaddr_ll {
 | |
| 	unsigned short	sll_family;
 | |
| 	__be16		sll_protocol;
 | |
| 	int		sll_ifindex;
 | |
| 	unsigned short	sll_hatype;
 | |
| 	unsigned char	sll_pkttype;
 | |
| 	unsigned char	sll_halen;
 | |
| 	unsigned char	sll_addr[8];
 | |
| };
 | |
| 
 | |
| /* Packet types */
 | |
| 
 | |
| #define PACKET_HOST		0		/* To us		*/
 | |
| #define PACKET_BROADCAST	1		/* To all		*/
 | |
| #define PACKET_MULTICAST	2		/* To group		*/
 | |
| #define PACKET_OTHERHOST	3		/* To someone else 	*/
 | |
| #define PACKET_OUTGOING		4		/* Outgoing of any type */
 | |
| #define PACKET_LOOPBACK		5		/* MC/BRD frame looped back */
 | |
| #define PACKET_USER		6		/* To user space	*/
 | |
| #define PACKET_KERNEL		7		/* To kernel space	*/
 | |
| /* Unused, PACKET_FASTROUTE and PACKET_LOOPBACK are invisible to user space */
 | |
| #define PACKET_FASTROUTE	6		/* Fastrouted frame	*/
 | |
| 
 | |
| /* Packet socket options */
 | |
| 
 | |
| #define PACKET_ADD_MEMBERSHIP		1
 | |
| #define PACKET_DROP_MEMBERSHIP		2
 | |
| #define PACKET_RECV_OUTPUT		3
 | |
| /* Value 4 is still used by obsolete turbo-packet. */
 | |
| #define PACKET_RX_RING			5
 | |
| #define PACKET_STATISTICS		6
 | |
| #define PACKET_COPY_THRESH		7
 | |
| #define PACKET_AUXDATA			8
 | |
| #define PACKET_ORIGDEV			9
 | |
| #define PACKET_VERSION			10
 | |
| #define PACKET_HDRLEN			11
 | |
| #define PACKET_RESERVE			12
 | |
| #define PACKET_TX_RING			13
 | |
| #define PACKET_LOSS			14
 | |
| #define PACKET_VNET_HDR			15
 | |
| #define PACKET_TX_TIMESTAMP		16
 | |
| #define PACKET_TIMESTAMP		17
 | |
| #define PACKET_FANOUT			18
 | |
| #define PACKET_TX_HAS_OFF		19
 | |
| #define PACKET_QDISC_BYPASS		20
 | |
| 
 | |
| #define PACKET_FANOUT_HASH		0
 | |
| #define PACKET_FANOUT_LB		1
 | |
| #define PACKET_FANOUT_CPU		2
 | |
| #define PACKET_FANOUT_ROLLOVER		3
 | |
| #define PACKET_FANOUT_RND		4
 | |
| #define PACKET_FANOUT_QM		5
 | |
| #define PACKET_FANOUT_FLAG_ROLLOVER	0x1000
 | |
| #define PACKET_FANOUT_FLAG_DEFRAG	0x8000
 | |
| 
 | |
| struct tpacket_stats {
 | |
| 	unsigned int	tp_packets;
 | |
| 	unsigned int	tp_drops;
 | |
| };
 | |
| 
 | |
| struct tpacket_stats_v3 {
 | |
| 	unsigned int	tp_packets;
 | |
| 	unsigned int	tp_drops;
 | |
| 	unsigned int	tp_freeze_q_cnt;
 | |
| };
 | |
| 
 | |
| union tpacket_stats_u {
 | |
| 	struct tpacket_stats stats1;
 | |
| 	struct tpacket_stats_v3 stats3;
 | |
| };
 | |
| 
 | |
| struct tpacket_auxdata {
 | |
| 	__u32		tp_status;
 | |
| 	__u32		tp_len;
 | |
| 	__u32		tp_snaplen;
 | |
| 	__u16		tp_mac;
 | |
| 	__u16		tp_net;
 | |
| 	__u16		tp_vlan_tci;
 | |
| 	__u16		tp_vlan_tpid;
 | |
| };
 | |
| 
 | |
| /* Rx ring - header status */
 | |
| #define TP_STATUS_KERNEL		      0
 | |
| #define TP_STATUS_USER			(1 << 0)
 | |
| #define TP_STATUS_COPY			(1 << 1)
 | |
| #define TP_STATUS_LOSING		(1 << 2)
 | |
| #define TP_STATUS_CSUMNOTREADY		(1 << 3)
 | |
| #define TP_STATUS_VLAN_VALID		(1 << 4) /* auxdata has valid tp_vlan_tci */
 | |
| #define TP_STATUS_BLK_TMO		(1 << 5)
 | |
| #define TP_STATUS_VLAN_TPID_VALID	(1 << 6) /* auxdata has valid tp_vlan_tpid */
 | |
| 
 | |
| /* Tx ring - header status */
 | |
| #define TP_STATUS_AVAILABLE	      0
 | |
| #define TP_STATUS_SEND_REQUEST	(1 << 0)
 | |
| #define TP_STATUS_SENDING	(1 << 1)
 | |
| #define TP_STATUS_WRONG_FORMAT	(1 << 2)
 | |
| 
 | |
| /* Rx and Tx ring - header status */
 | |
| #define TP_STATUS_TS_SOFTWARE		(1 << 29)
 | |
| #define TP_STATUS_TS_SYS_HARDWARE	(1 << 30)
 | |
| #define TP_STATUS_TS_RAW_HARDWARE	(1 << 31)
 | |
| 
 | |
| /* Rx ring - feature request bits */
 | |
| #define TP_FT_REQ_FILL_RXHASH	0x1
 | |
| 
 | |
| struct tpacket_hdr {
 | |
| 	unsigned long	tp_status;
 | |
| 	unsigned int	tp_len;
 | |
| 	unsigned int	tp_snaplen;
 | |
| 	unsigned short	tp_mac;
 | |
| 	unsigned short	tp_net;
 | |
| 	unsigned int	tp_sec;
 | |
| 	unsigned int	tp_usec;
 | |
| };
 | |
| 
 | |
| #define TPACKET_ALIGNMENT	16
 | |
| #define TPACKET_ALIGN(x)	(((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
 | |
| #define TPACKET_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll))
 | |
| 
 | |
| struct tpacket2_hdr {
 | |
| 	__u32		tp_status;
 | |
| 	__u32		tp_len;
 | |
| 	__u32		tp_snaplen;
 | |
| 	__u16		tp_mac;
 | |
| 	__u16		tp_net;
 | |
| 	__u32		tp_sec;
 | |
| 	__u32		tp_nsec;
 | |
| 	__u16		tp_vlan_tci;
 | |
| 	__u16		tp_vlan_tpid;
 | |
| 	__u8		tp_padding[4];
 | |
| };
 | |
| 
 | |
| struct tpacket_hdr_variant1 {
 | |
| 	__u32	tp_rxhash;
 | |
| 	__u32	tp_vlan_tci;
 | |
| 	__u16	tp_vlan_tpid;
 | |
| 	__u16	tp_padding;
 | |
| };
 | |
| 
 | |
| struct tpacket3_hdr {
 | |
| 	__u32		tp_next_offset;
 | |
| 	__u32		tp_sec;
 | |
| 	__u32		tp_nsec;
 | |
| 	__u32		tp_snaplen;
 | |
| 	__u32		tp_len;
 | |
| 	__u32		tp_status;
 | |
| 	__u16		tp_mac;
 | |
| 	__u16		tp_net;
 | |
| 	/* pkt_hdr variants */
 | |
| 	union {
 | |
| 		struct tpacket_hdr_variant1 hv1;
 | |
| 	};
 | |
| 	__u8		tp_padding[8];
 | |
| };
 | |
| 
 | |
| struct tpacket_bd_ts {
 | |
| 	unsigned int ts_sec;
 | |
| 	union {
 | |
| 		unsigned int ts_usec;
 | |
| 		unsigned int ts_nsec;
 | |
| 	};
 | |
| };
 | |
| 
 | |
| struct tpacket_hdr_v1 {
 | |
| 	__u32	block_status;
 | |
| 	__u32	num_pkts;
 | |
| 	__u32	offset_to_first_pkt;
 | |
| 
 | |
| 	/* Number of valid bytes (including padding)
 | |
| 	 * blk_len <= tp_block_size
 | |
| 	 */
 | |
| 	__u32	blk_len;
 | |
| 
 | |
| 	/*
 | |
| 	 * Quite a few uses of sequence number:
 | |
| 	 * 1. Make sure cache flush etc worked.
 | |
| 	 *    Well, one can argue - why not use the increasing ts below?
 | |
| 	 *    But look at 2. below first.
 | |
| 	 * 2. When you pass around blocks to other user space decoders,
 | |
| 	 *    you can see which blk[s] is[are] outstanding etc.
 | |
| 	 * 3. Validate kernel code.
 | |
| 	 */
 | |
| 	__aligned_u64	seq_num;
 | |
| 
 | |
| 	/*
 | |
| 	 * ts_last_pkt:
 | |
| 	 *
 | |
| 	 * Case 1.	Block has 'N'(N >=1) packets and TMO'd(timed out)
 | |
| 	 *		ts_last_pkt == 'time-stamp of last packet' and NOT the
 | |
| 	 *		time when the timer fired and the block was closed.
 | |
| 	 *		By providing the ts of the last packet we can absolutely
 | |
| 	 *		guarantee that time-stamp wise, the first packet in the
 | |
| 	 *		next block will never precede the last packet of the
 | |
| 	 *		previous block.
 | |
| 	 * Case 2.	Block has zero packets and TMO'd
 | |
| 	 *		ts_last_pkt = time when the timer fired and the block
 | |
| 	 *		was closed.
 | |
| 	 * Case 3.	Block has 'N' packets and NO TMO.
 | |
| 	 *		ts_last_pkt = time-stamp of the last pkt in the block.
 | |
| 	 *
 | |
| 	 * ts_first_pkt:
 | |
| 	 *		Is always the time-stamp when the block was opened.
 | |
| 	 *		Case a)	ZERO packets
 | |
| 	 *			No packets to deal with but atleast you know the
 | |
| 	 *			time-interval of this block.
 | |
| 	 *		Case b) Non-zero packets
 | |
| 	 *			Use the ts of the first packet in the block.
 | |
| 	 *
 | |
| 	 */
 | |
| 	struct tpacket_bd_ts	ts_first_pkt, ts_last_pkt;
 | |
| };
 | |
| 
 | |
| union tpacket_bd_header_u {
 | |
| 	struct tpacket_hdr_v1 bh1;
 | |
| };
 | |
| 
 | |
| struct tpacket_block_desc {
 | |
| 	__u32 version;
 | |
| 	__u32 offset_to_priv;
 | |
| 	union tpacket_bd_header_u hdr;
 | |
| };
 | |
| 
 | |
| #define TPACKET2_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
 | |
| #define TPACKET3_HDRLEN		(TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
 | |
| 
 | |
| enum tpacket_versions {
 | |
| 	TPACKET_V1,
 | |
| 	TPACKET_V2,
 | |
| 	TPACKET_V3
 | |
| };
 | |
| 
 | |
| /*
 | |
|    Frame structure:
 | |
| 
 | |
|    - Start. Frame must be aligned to TPACKET_ALIGNMENT=16
 | |
|    - struct tpacket_hdr
 | |
|    - pad to TPACKET_ALIGNMENT=16
 | |
|    - struct sockaddr_ll
 | |
|    - Gap, chosen so that packet data (Start+tp_net) alignes to TPACKET_ALIGNMENT=16
 | |
|    - Start+tp_mac: [ Optional MAC header ]
 | |
|    - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16.
 | |
|    - Pad to align to TPACKET_ALIGNMENT=16
 | |
|  */
 | |
| 
 | |
| struct tpacket_req {
 | |
| 	unsigned int	tp_block_size;	/* Minimal size of contiguous block */
 | |
| 	unsigned int	tp_block_nr;	/* Number of blocks */
 | |
| 	unsigned int	tp_frame_size;	/* Size of frame */
 | |
| 	unsigned int	tp_frame_nr;	/* Total number of frames */
 | |
| };
 | |
| 
 | |
| struct tpacket_req3 {
 | |
| 	unsigned int	tp_block_size;	/* Minimal size of contiguous block */
 | |
| 	unsigned int	tp_block_nr;	/* Number of blocks */
 | |
| 	unsigned int	tp_frame_size;	/* Size of frame */
 | |
| 	unsigned int	tp_frame_nr;	/* Total number of frames */
 | |
| 	unsigned int	tp_retire_blk_tov; /* timeout in msecs */
 | |
| 	unsigned int	tp_sizeof_priv; /* offset to private data area */
 | |
| 	unsigned int	tp_feature_req_word;
 | |
| };
 | |
| 
 | |
| union tpacket_req_u {
 | |
| 	struct tpacket_req	req;
 | |
| 	struct tpacket_req3	req3;
 | |
| };
 | |
| 
 | |
| struct packet_mreq {
 | |
| 	int		mr_ifindex;
 | |
| 	unsigned short	mr_type;
 | |
| 	unsigned short	mr_alen;
 | |
| 	unsigned char	mr_address[8];
 | |
| };
 | |
| 
 | |
| #define PACKET_MR_MULTICAST	0
 | |
| #define PACKET_MR_PROMISC	1
 | |
| #define PACKET_MR_ALLMULTI	2
 | |
| #define PACKET_MR_UNICAST	3
 | |
| 
 | |
| #endif
 |