| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved. | 
					
						
							|  |  |  |  * Copyright (c) 2004 Infinicon Corporation.  All rights reserved. | 
					
						
							|  |  |  |  * Copyright (c) 2004 Intel Corporation.  All rights reserved. | 
					
						
							|  |  |  |  * Copyright (c) 2004 Topspin Corporation.  All rights reserved. | 
					
						
							|  |  |  |  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved. | 
					
						
							| 
									
										
										
										
											2005-08-10 23:03:10 -07:00
										 |  |  |  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. | 
					
						
							| 
									
										
										
										
											2007-03-04 16:15:11 -08:00
										 |  |  |  * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  * | 
					
						
							|  |  |  |  * This software is available to you under a choice of one of two | 
					
						
							|  |  |  |  * licenses.  You may choose to be licensed under the terms of the GNU | 
					
						
							|  |  |  |  * General Public License (GPL) Version 2, available from the file | 
					
						
							|  |  |  |  * COPYING in the main directory of this source tree, or the | 
					
						
							|  |  |  |  * OpenIB.org BSD license below: | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *     Redistribution and use in source and binary forms, with or | 
					
						
							|  |  |  |  *     without modification, are permitted provided that the following | 
					
						
							|  |  |  |  *     conditions are met: | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *      - Redistributions of source code must retain the above | 
					
						
							|  |  |  |  *        copyright notice, this list of conditions and the following | 
					
						
							|  |  |  |  *        disclaimer. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *      - Redistributions in binary form must reproduce the above | 
					
						
							|  |  |  |  *        copyright notice, this list of conditions and the following | 
					
						
							|  |  |  |  *        disclaimer in the documentation and/or other materials | 
					
						
							|  |  |  |  *        provided with the distribution. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 
					
						
							|  |  |  |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | 
					
						
							|  |  |  |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | 
					
						
							|  |  |  |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | 
					
						
							|  |  |  |  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | 
					
						
							|  |  |  |  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | 
					
						
							|  |  |  |  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
					
						
							|  |  |  |  * SOFTWARE. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if !defined(IB_VERBS_H)
 | 
					
						
							|  |  |  | #define IB_VERBS_H
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <linux/types.h>
 | 
					
						
							|  |  |  | #include <linux/device.h>
 | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | #include <linux/mm.h>
 | 
					
						
							|  |  |  | #include <linux/dma-mapping.h>
 | 
					
						
							| 
									
										
										
										
											2007-02-04 14:11:55 -08:00
										 |  |  | #include <linux/kref.h>
 | 
					
						
							| 
									
										
										
										
											2007-07-31 16:49:15 +03:00
										 |  |  | #include <linux/list.h>
 | 
					
						
							|  |  |  | #include <linux/rwsem.h>
 | 
					
						
							| 
									
										
										
										
											2007-10-30 10:35:04 +01:00
										 |  |  | #include <linux/scatterlist.h>
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:24:36 +00:00
										 |  |  | #include <linux/workqueue.h>
 | 
					
						
							| 
									
										
											  
											
												IB/core: Ethernet L2 attributes in verbs/cm structures
This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.
When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.
Thus, those attributes were added to the following structures:
* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id
For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.
On the active side, the CM fills. its internal structures from the
path provided by the ULP.  We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).
On the passive side, the CM fills its internal structures from the WC
associated with the REQ message.  We add there taking the ETH L2
attributes from the WC.
When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.
ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB.  Vendor drivers are modified to support the new
function signature.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-12-12 18:03:11 +02:00
										 |  |  | #include <uapi/linux/if_ether.h>
 | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-26 16:09:06 -07:00
										 |  |  | #include <linux/atomic.h>
 | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | #include <asm/uaccess.h>
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:24:36 +00:00
										 |  |  | extern struct workqueue_struct *ib_wq; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | union ib_gid { | 
					
						
							|  |  |  | 	u8	raw[16]; | 
					
						
							|  |  |  | 	struct { | 
					
						
							| 
									
										
										
										
											2005-08-13 21:05:57 -07:00
										 |  |  | 		__be64	subnet_prefix; | 
					
						
							|  |  |  | 		__be64	interface_id; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	} global; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-08-03 16:02:42 -05:00
										 |  |  | enum rdma_node_type { | 
					
						
							|  |  |  | 	/* IB values map to NodeInfo:NodeType. */ | 
					
						
							|  |  |  | 	RDMA_NODE_IB_CA 	= 1, | 
					
						
							|  |  |  | 	RDMA_NODE_IB_SWITCH, | 
					
						
							|  |  |  | 	RDMA_NODE_IB_ROUTER, | 
					
						
							| 
									
										
										
										
											2013-09-10 03:36:59 +00:00
										 |  |  | 	RDMA_NODE_RNIC, | 
					
						
							|  |  |  | 	RDMA_NODE_USNIC, | 
					
						
							| 
									
										
										
										
											2014-01-15 17:02:36 -08:00
										 |  |  | 	RDMA_NODE_USNIC_UDP, | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-08-03 16:02:42 -05:00
										 |  |  | enum rdma_transport_type { | 
					
						
							|  |  |  | 	RDMA_TRANSPORT_IB, | 
					
						
							| 
									
										
										
										
											2013-09-10 03:36:59 +00:00
										 |  |  | 	RDMA_TRANSPORT_IWARP, | 
					
						
							| 
									
										
										
										
											2014-01-09 14:48:19 -08:00
										 |  |  | 	RDMA_TRANSPORT_USNIC, | 
					
						
							|  |  |  | 	RDMA_TRANSPORT_USNIC_UDP | 
					
						
							| 
									
										
										
										
											2006-08-03 16:02:42 -05:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum rdma_transport_type | 
					
						
							|  |  |  | rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-09-27 17:51:10 -07:00
										 |  |  | enum rdma_link_layer { | 
					
						
							|  |  |  | 	IB_LINK_LAYER_UNSPECIFIED, | 
					
						
							|  |  |  | 	IB_LINK_LAYER_INFINIBAND, | 
					
						
							|  |  |  | 	IB_LINK_LAYER_ETHERNET, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | enum ib_device_cap_flags { | 
					
						
							|  |  |  | 	IB_DEVICE_RESIZE_MAX_WR		= 1, | 
					
						
							|  |  |  | 	IB_DEVICE_BAD_PKEY_CNTR		= (1<<1), | 
					
						
							|  |  |  | 	IB_DEVICE_BAD_QKEY_CNTR		= (1<<2), | 
					
						
							|  |  |  | 	IB_DEVICE_RAW_MULTI		= (1<<3), | 
					
						
							|  |  |  | 	IB_DEVICE_AUTO_PATH_MIG		= (1<<4), | 
					
						
							|  |  |  | 	IB_DEVICE_CHANGE_PHY_PORT	= (1<<5), | 
					
						
							|  |  |  | 	IB_DEVICE_UD_AV_PORT_ENFORCE	= (1<<6), | 
					
						
							|  |  |  | 	IB_DEVICE_CURR_QP_STATE_MOD	= (1<<7), | 
					
						
							|  |  |  | 	IB_DEVICE_SHUTDOWN_PORT		= (1<<8), | 
					
						
							|  |  |  | 	IB_DEVICE_INIT_TYPE		= (1<<9), | 
					
						
							|  |  |  | 	IB_DEVICE_PORT_ACTIVE_EVENT	= (1<<10), | 
					
						
							|  |  |  | 	IB_DEVICE_SYS_IMAGE_GUID	= (1<<11), | 
					
						
							|  |  |  | 	IB_DEVICE_RC_RNR_NAK_GEN	= (1<<12), | 
					
						
							|  |  |  | 	IB_DEVICE_SRQ_RESIZE		= (1<<13), | 
					
						
							|  |  |  | 	IB_DEVICE_N_NOTIFY_CQ		= (1<<14), | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:53 -07:00
										 |  |  | 	IB_DEVICE_LOCAL_DMA_LKEY	= (1<<15), | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:32 -07:00
										 |  |  | 	IB_DEVICE_RESERVED		= (1<<16), /* old SEND_W_INV */ | 
					
						
							| 
									
										
										
										
											2008-01-30 18:30:57 +02:00
										 |  |  | 	IB_DEVICE_MEM_WINDOW		= (1<<17), | 
					
						
							|  |  |  | 	/*
 | 
					
						
							|  |  |  | 	 * Devices should set IB_DEVICE_UD_IP_SUM if they support | 
					
						
							|  |  |  | 	 * insertion of UDP and TCP checksum on outgoing UD IPoIB | 
					
						
							|  |  |  | 	 * messages and can verify the validity of checksum for | 
					
						
							|  |  |  | 	 * incoming messages.  Setting this flag implies that the | 
					
						
							|  |  |  | 	 * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	IB_DEVICE_UD_IP_CSUM		= (1<<18), | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:27 -07:00
										 |  |  | 	IB_DEVICE_UD_TSO		= (1<<19), | 
					
						
							| 
									
										
										
										
											2011-05-23 17:52:46 -07:00
										 |  |  | 	IB_DEVICE_XRC			= (1<<20), | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:45 -07:00
										 |  |  | 	IB_DEVICE_MEM_MGT_EXTENSIONS	= (1<<21), | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:48 -07:00
										 |  |  | 	IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | 	IB_DEVICE_MEM_WINDOW_TYPE_2A	= (1<<23), | 
					
						
							| 
									
										
											  
											
												IB/core: Add receive flow steering support
The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side.  Applications
using these QPs must be provided with a method to program some
steering rule with the HW so packets arriving at the local port can be
routed to them.
This patch adds ib_create_flow(), which allow providing a flow
specification for a QP.  When there's a match between the
specification and a received packet, the packet is forwarded to that
QP, in a the same way one uses ib_attach_multicast() for IB UD
multicast handling.
Flow specifications are provided as instances of struct ib_flow_spec_yyy,
which describe L2, L3 and L4 headers.  Currently specs for Ethernet, IPv4,
TCP and UDP are defined.  Flow specs are made of values and masks.
The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.
    struct ib_flow_attr {
            enum ib_flow_attr_type type;
            u16      size;
            u16      priority;
            u32      flags;
            u8       num_of_specs;
            u8       port;
            /* Following are the optional layers according to user request
             * struct ib_flow_spec_yyy
             * struct ib_flow_spec_zzz
             */
    };
As these specs are eventually coming from user space, they are defined and
used in a way which allows adding new spec types without kernel/user ABI
change, just with a little API enhancement which defines the newly added spec.
The flow spec structures are defined with TLV (Type-Length-Value)
entries, which allows calling ib_create_flow() with a list of variable
length of optional specs.
For the actual processing of ib_flow_attr the driver uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.
Steering rules processing order is according to the domain over which
the rule is set and the rule priority.  All rules set by user space
applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains
could be used by future IPoIB RFS and Ethetool flow-steering interface
implementation.  Lower numerical value for the priority field means
higher priority.
The returned value from ib_create_flow() is a struct ib_flow, which
contains a database pointer (handle) provided by the HW driver to be
used when calling ib_destroy_flow().
Applications that offload TCP/IP traffic can also be written over IB
UD QPs.  The ib_create_flow() / ib_destroy_flow() API is designed to
support UD QPs too.  A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING
to denote support for flow steering.
The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:
    IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification
    IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
        all Ethernet traffic which isn't steered to any QP
    IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast
    IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic
ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-08-07 14:01:59 +03:00
										 |  |  | 	IB_DEVICE_MEM_WINDOW_TYPE_2B	= (1<<24), | 
					
						
							|  |  |  | 	IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_atomic_cap { | 
					
						
							|  |  |  | 	IB_ATOMIC_NONE, | 
					
						
							|  |  |  | 	IB_ATOMIC_HCA, | 
					
						
							|  |  |  | 	IB_ATOMIC_GLOB | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_device_attr { | 
					
						
							|  |  |  | 	u64			fw_ver; | 
					
						
							| 
									
										
										
										
											2005-08-13 21:05:57 -07:00
										 |  |  | 	__be64			sys_image_guid; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	u64			max_mr_size; | 
					
						
							|  |  |  | 	u64			page_size_cap; | 
					
						
							|  |  |  | 	u32			vendor_id; | 
					
						
							|  |  |  | 	u32			vendor_part_id; | 
					
						
							|  |  |  | 	u32			hw_ver; | 
					
						
							|  |  |  | 	int			max_qp; | 
					
						
							|  |  |  | 	int			max_qp_wr; | 
					
						
							|  |  |  | 	int			device_cap_flags; | 
					
						
							|  |  |  | 	int			max_sge; | 
					
						
							|  |  |  | 	int			max_sge_rd; | 
					
						
							|  |  |  | 	int			max_cq; | 
					
						
							|  |  |  | 	int			max_cqe; | 
					
						
							|  |  |  | 	int			max_mr; | 
					
						
							|  |  |  | 	int			max_pd; | 
					
						
							|  |  |  | 	int			max_qp_rd_atom; | 
					
						
							|  |  |  | 	int			max_ee_rd_atom; | 
					
						
							|  |  |  | 	int			max_res_rd_atom; | 
					
						
							|  |  |  | 	int			max_qp_init_rd_atom; | 
					
						
							|  |  |  | 	int			max_ee_init_rd_atom; | 
					
						
							|  |  |  | 	enum ib_atomic_cap	atomic_cap; | 
					
						
							| 
									
										
											  
											
												IB/core: Add support for masked atomic operations
 - Add new IB_WR_MASKED_ATOMIC_CMP_AND_SWP and IB_WR_MASKED_ATOMIC_FETCH_AND_ADD
   send opcodes that can be used to post "masked atomic compare and
   swap" and "masked atomic fetch and add" work request respectively.
 - Add masked_atomic_cap capability.
 - Add mask fields to atomic struct of ib_send_wr
 - Add new opcodes to ib_wc_opcode
The new operations are described more precisely below:
* Masked Compare and Swap (MskCmpSwap)
The MskCmpSwap atomic operation is an extension to the CmpSwap
operation defined in the IB spec.  MskCmpSwap allows the user to
select a portion of the 64 bit target data for the “compare” check as
well as to restrict the swap to a (possibly different) portion.  The
pseudo code below describes the operation:
| atomic_response = *va
| if (!((compare_add ^ *va) & compare_add_mask)) then
|     *va = (*va & ~(swap_mask)) | (swap & swap_mask)
|
| return atomic_response
The additional operands are carried in the Extended Transport Header.
Atomic response generation and packet format for MskCmpSwap is as for
standard IB Atomic operations.
* Masked Fetch and Add (MFetchAdd)
The MFetchAdd Atomic operation extends the functionality of the
standard IB FetchAdd by allowing the user to split the target into
multiple fields of selectable length. The atomic add is done
independently on each one of this fields. A bit set in the
field_boundary parameter specifies the field boundaries. The pseudo
code below describes the operation:
| bit_adder(ci, b1, b2, *co)
| {
|	value = ci + b1 + b2
|	*co = !!(value & 2)
|
|	return value & 1
| }
|
| #define MASK_IS_SET(mask, attr)      (!!((mask)&(attr)))
| bit_position = 1
| carry = 0
| atomic_response = 0
|
| for i = 0 to 63
| {
|         if ( i != 0 )
|                 bit_position =  bit_position << 1
|
|         bit_add_res = bit_adder(carry, MASK_IS_SET(*va, bit_position),
|                                 MASK_IS_SET(compare_add, bit_position), &new_carry)
|         if (bit_add_res)
|                 atomic_response |= bit_position
|
|         carry = ((new_carry) && (!MASK_IS_SET(compare_add_mask, bit_position)))
| }
|
| return atomic_response
Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2010-04-14 17:23:01 +03:00
										 |  |  | 	enum ib_atomic_cap	masked_atomic_cap; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int			max_ee; | 
					
						
							|  |  |  | 	int			max_rdd; | 
					
						
							|  |  |  | 	int			max_mw; | 
					
						
							|  |  |  | 	int			max_raw_ipv6_qp; | 
					
						
							|  |  |  | 	int			max_raw_ethy_qp; | 
					
						
							|  |  |  | 	int			max_mcast_grp; | 
					
						
							|  |  |  | 	int			max_mcast_qp_attach; | 
					
						
							|  |  |  | 	int			max_total_mcast_qp_attach; | 
					
						
							|  |  |  | 	int			max_ah; | 
					
						
							|  |  |  | 	int			max_fmr; | 
					
						
							|  |  |  | 	int			max_map_per_fmr; | 
					
						
							|  |  |  | 	int			max_srq; | 
					
						
							|  |  |  | 	int			max_srq_wr; | 
					
						
							|  |  |  | 	int			max_srq_sge; | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:45 -07:00
										 |  |  | 	unsigned int		max_fast_reg_page_list_len; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	u16			max_pkeys; | 
					
						
							|  |  |  | 	u8			local_ca_ack_delay; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_mtu { | 
					
						
							|  |  |  | 	IB_MTU_256  = 1, | 
					
						
							|  |  |  | 	IB_MTU_512  = 2, | 
					
						
							|  |  |  | 	IB_MTU_1024 = 3, | 
					
						
							|  |  |  | 	IB_MTU_2048 = 4, | 
					
						
							|  |  |  | 	IB_MTU_4096 = 5 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	switch (mtu) { | 
					
						
							|  |  |  | 	case IB_MTU_256:  return  256; | 
					
						
							|  |  |  | 	case IB_MTU_512:  return  512; | 
					
						
							|  |  |  | 	case IB_MTU_1024: return 1024; | 
					
						
							|  |  |  | 	case IB_MTU_2048: return 2048; | 
					
						
							|  |  |  | 	case IB_MTU_4096: return 4096; | 
					
						
							|  |  |  | 	default: 	  return -1; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_port_state { | 
					
						
							|  |  |  | 	IB_PORT_NOP		= 0, | 
					
						
							|  |  |  | 	IB_PORT_DOWN		= 1, | 
					
						
							|  |  |  | 	IB_PORT_INIT		= 2, | 
					
						
							|  |  |  | 	IB_PORT_ARMED		= 3, | 
					
						
							|  |  |  | 	IB_PORT_ACTIVE		= 4, | 
					
						
							|  |  |  | 	IB_PORT_ACTIVE_DEFER	= 5 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_port_cap_flags { | 
					
						
							|  |  |  | 	IB_PORT_SM				= 1 <<  1, | 
					
						
							|  |  |  | 	IB_PORT_NOTICE_SUP			= 1 <<  2, | 
					
						
							|  |  |  | 	IB_PORT_TRAP_SUP			= 1 <<  3, | 
					
						
							|  |  |  | 	IB_PORT_OPT_IPD_SUP                     = 1 <<  4, | 
					
						
							|  |  |  | 	IB_PORT_AUTO_MIGR_SUP			= 1 <<  5, | 
					
						
							|  |  |  | 	IB_PORT_SL_MAP_SUP			= 1 <<  6, | 
					
						
							|  |  |  | 	IB_PORT_MKEY_NVRAM			= 1 <<  7, | 
					
						
							|  |  |  | 	IB_PORT_PKEY_NVRAM			= 1 <<  8, | 
					
						
							|  |  |  | 	IB_PORT_LED_INFO_SUP			= 1 <<  9, | 
					
						
							|  |  |  | 	IB_PORT_SM_DISABLED			= 1 << 10, | 
					
						
							|  |  |  | 	IB_PORT_SYS_IMAGE_GUID_SUP		= 1 << 11, | 
					
						
							|  |  |  | 	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP	= 1 << 12, | 
					
						
							| 
									
										
										
										
											2011-10-05 14:21:47 +03:00
										 |  |  | 	IB_PORT_EXTENDED_SPEEDS_SUP             = 1 << 14, | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	IB_PORT_CM_SUP				= 1 << 16, | 
					
						
							|  |  |  | 	IB_PORT_SNMP_TUNNEL_SUP			= 1 << 17, | 
					
						
							|  |  |  | 	IB_PORT_REINIT_SUP			= 1 << 18, | 
					
						
							|  |  |  | 	IB_PORT_DEVICE_MGMT_SUP			= 1 << 19, | 
					
						
							|  |  |  | 	IB_PORT_VENDOR_CLASS_SUP		= 1 << 20, | 
					
						
							|  |  |  | 	IB_PORT_DR_NOTICE_SUP			= 1 << 21, | 
					
						
							|  |  |  | 	IB_PORT_CAP_MASK_NOTICE_SUP		= 1 << 22, | 
					
						
							|  |  |  | 	IB_PORT_BOOT_MGMT_SUP			= 1 << 23, | 
					
						
							|  |  |  | 	IB_PORT_LINK_LATENCY_SUP		= 1 << 24, | 
					
						
							| 
									
										
										
										
											2014-02-09 11:54:34 +02:00
										 |  |  | 	IB_PORT_CLIENT_REG_SUP			= 1 << 25, | 
					
						
							|  |  |  | 	IB_PORT_IP_BASED_GIDS			= 1 << 26 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_port_width { | 
					
						
							|  |  |  | 	IB_WIDTH_1X	= 1, | 
					
						
							|  |  |  | 	IB_WIDTH_4X	= 2, | 
					
						
							|  |  |  | 	IB_WIDTH_8X	= 4, | 
					
						
							|  |  |  | 	IB_WIDTH_12X	= 8 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int ib_width_enum_to_int(enum ib_port_width width) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	switch (width) { | 
					
						
							|  |  |  | 	case IB_WIDTH_1X:  return  1; | 
					
						
							|  |  |  | 	case IB_WIDTH_4X:  return  4; | 
					
						
							|  |  |  | 	case IB_WIDTH_8X:  return  8; | 
					
						
							|  |  |  | 	case IB_WIDTH_12X: return 12; | 
					
						
							|  |  |  | 	default: 	  return -1; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-02-28 18:49:50 +02:00
										 |  |  | enum ib_port_speed { | 
					
						
							|  |  |  | 	IB_SPEED_SDR	= 1, | 
					
						
							|  |  |  | 	IB_SPEED_DDR	= 2, | 
					
						
							|  |  |  | 	IB_SPEED_QDR	= 4, | 
					
						
							|  |  |  | 	IB_SPEED_FDR10	= 8, | 
					
						
							|  |  |  | 	IB_SPEED_FDR	= 16, | 
					
						
							|  |  |  | 	IB_SPEED_EDR	= 32 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:48 -07:00
										 |  |  | struct ib_protocol_stats { | 
					
						
							|  |  |  | 	/* TBD... */ | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct iw_protocol_stats { | 
					
						
							|  |  |  | 	u64	ipInReceives; | 
					
						
							|  |  |  | 	u64	ipInHdrErrors; | 
					
						
							|  |  |  | 	u64	ipInTooBigErrors; | 
					
						
							|  |  |  | 	u64	ipInNoRoutes; | 
					
						
							|  |  |  | 	u64	ipInAddrErrors; | 
					
						
							|  |  |  | 	u64	ipInUnknownProtos; | 
					
						
							|  |  |  | 	u64	ipInTruncatedPkts; | 
					
						
							|  |  |  | 	u64	ipInDiscards; | 
					
						
							|  |  |  | 	u64	ipInDelivers; | 
					
						
							|  |  |  | 	u64	ipOutForwDatagrams; | 
					
						
							|  |  |  | 	u64	ipOutRequests; | 
					
						
							|  |  |  | 	u64	ipOutDiscards; | 
					
						
							|  |  |  | 	u64	ipOutNoRoutes; | 
					
						
							|  |  |  | 	u64	ipReasmTimeout; | 
					
						
							|  |  |  | 	u64	ipReasmReqds; | 
					
						
							|  |  |  | 	u64	ipReasmOKs; | 
					
						
							|  |  |  | 	u64	ipReasmFails; | 
					
						
							|  |  |  | 	u64	ipFragOKs; | 
					
						
							|  |  |  | 	u64	ipFragFails; | 
					
						
							|  |  |  | 	u64	ipFragCreates; | 
					
						
							|  |  |  | 	u64	ipInMcastPkts; | 
					
						
							|  |  |  | 	u64	ipOutMcastPkts; | 
					
						
							|  |  |  | 	u64	ipInBcastPkts; | 
					
						
							|  |  |  | 	u64	ipOutBcastPkts; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	u64	tcpRtoAlgorithm; | 
					
						
							|  |  |  | 	u64	tcpRtoMin; | 
					
						
							|  |  |  | 	u64	tcpRtoMax; | 
					
						
							|  |  |  | 	u64	tcpMaxConn; | 
					
						
							|  |  |  | 	u64	tcpActiveOpens; | 
					
						
							|  |  |  | 	u64	tcpPassiveOpens; | 
					
						
							|  |  |  | 	u64	tcpAttemptFails; | 
					
						
							|  |  |  | 	u64	tcpEstabResets; | 
					
						
							|  |  |  | 	u64	tcpCurrEstab; | 
					
						
							|  |  |  | 	u64	tcpInSegs; | 
					
						
							|  |  |  | 	u64	tcpOutSegs; | 
					
						
							|  |  |  | 	u64	tcpRetransSegs; | 
					
						
							|  |  |  | 	u64	tcpInErrs; | 
					
						
							|  |  |  | 	u64	tcpOutRsts; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | union rdma_protocol_stats { | 
					
						
							|  |  |  | 	struct ib_protocol_stats	ib; | 
					
						
							|  |  |  | 	struct iw_protocol_stats	iw; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_port_attr { | 
					
						
							|  |  |  | 	enum ib_port_state	state; | 
					
						
							|  |  |  | 	enum ib_mtu		max_mtu; | 
					
						
							|  |  |  | 	enum ib_mtu		active_mtu; | 
					
						
							|  |  |  | 	int			gid_tbl_len; | 
					
						
							|  |  |  | 	u32			port_cap_flags; | 
					
						
							|  |  |  | 	u32			max_msg_sz; | 
					
						
							|  |  |  | 	u32			bad_pkey_cntr; | 
					
						
							|  |  |  | 	u32			qkey_viol_cntr; | 
					
						
							|  |  |  | 	u16			pkey_tbl_len; | 
					
						
							|  |  |  | 	u16			lid; | 
					
						
							|  |  |  | 	u16			sm_lid; | 
					
						
							|  |  |  | 	u8			lmc; | 
					
						
							|  |  |  | 	u8			max_vl_num; | 
					
						
							|  |  |  | 	u8			sm_sl; | 
					
						
							|  |  |  | 	u8			subnet_timeout; | 
					
						
							|  |  |  | 	u8			init_type_reply; | 
					
						
							|  |  |  | 	u8			active_width; | 
					
						
							|  |  |  | 	u8			active_speed; | 
					
						
							|  |  |  | 	u8                      phys_state; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_device_modify_flags { | 
					
						
							| 
									
										
										
										
											2006-02-02 09:47:14 -08:00
										 |  |  | 	IB_DEVICE_MODIFY_SYS_IMAGE_GUID	= 1 << 0, | 
					
						
							|  |  |  | 	IB_DEVICE_MODIFY_NODE_DESC	= 1 << 1 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_device_modify { | 
					
						
							|  |  |  | 	u64	sys_image_guid; | 
					
						
							| 
									
										
										
										
											2006-02-02 09:47:14 -08:00
										 |  |  | 	char	node_desc[64]; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_port_modify_flags { | 
					
						
							|  |  |  | 	IB_PORT_SHUTDOWN		= 1, | 
					
						
							|  |  |  | 	IB_PORT_INIT_TYPE		= (1<<2), | 
					
						
							|  |  |  | 	IB_PORT_RESET_QKEY_CNTR		= (1<<3) | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_port_modify { | 
					
						
							|  |  |  | 	u32	set_port_cap_mask; | 
					
						
							|  |  |  | 	u32	clr_port_cap_mask; | 
					
						
							|  |  |  | 	u8	init_type; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_event_type { | 
					
						
							|  |  |  | 	IB_EVENT_CQ_ERR, | 
					
						
							|  |  |  | 	IB_EVENT_QP_FATAL, | 
					
						
							|  |  |  | 	IB_EVENT_QP_REQ_ERR, | 
					
						
							|  |  |  | 	IB_EVENT_QP_ACCESS_ERR, | 
					
						
							|  |  |  | 	IB_EVENT_COMM_EST, | 
					
						
							|  |  |  | 	IB_EVENT_SQ_DRAINED, | 
					
						
							|  |  |  | 	IB_EVENT_PATH_MIG, | 
					
						
							|  |  |  | 	IB_EVENT_PATH_MIG_ERR, | 
					
						
							|  |  |  | 	IB_EVENT_DEVICE_FATAL, | 
					
						
							|  |  |  | 	IB_EVENT_PORT_ACTIVE, | 
					
						
							|  |  |  | 	IB_EVENT_PORT_ERR, | 
					
						
							|  |  |  | 	IB_EVENT_LID_CHANGE, | 
					
						
							|  |  |  | 	IB_EVENT_PKEY_CHANGE, | 
					
						
							| 
									
										
										
										
											2005-08-18 12:23:08 -07:00
										 |  |  | 	IB_EVENT_SM_CHANGE, | 
					
						
							|  |  |  | 	IB_EVENT_SRQ_ERR, | 
					
						
							|  |  |  | 	IB_EVENT_SRQ_LIMIT_REACHED, | 
					
						
							| 
									
										
										
										
											2006-06-17 20:37:35 -07:00
										 |  |  | 	IB_EVENT_QP_LAST_WQE_REACHED, | 
					
						
							| 
									
										
										
										
											2011-06-15 14:39:29 +00:00
										 |  |  | 	IB_EVENT_CLIENT_REREGISTER, | 
					
						
							|  |  |  | 	IB_EVENT_GID_CHANGE, | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_event { | 
					
						
							|  |  |  | 	struct ib_device	*device; | 
					
						
							|  |  |  | 	union { | 
					
						
							|  |  |  | 		struct ib_cq	*cq; | 
					
						
							|  |  |  | 		struct ib_qp	*qp; | 
					
						
							| 
									
										
										
										
											2005-08-18 12:23:08 -07:00
										 |  |  | 		struct ib_srq	*srq; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		u8		port_num; | 
					
						
							|  |  |  | 	} element; | 
					
						
							|  |  |  | 	enum ib_event_type	event; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_event_handler { | 
					
						
							|  |  |  | 	struct ib_device *device; | 
					
						
							|  |  |  | 	void            (*handler)(struct ib_event_handler *, struct ib_event *); | 
					
						
							|  |  |  | 	struct list_head  list; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler)		\
 | 
					
						
							|  |  |  | 	do {							\ | 
					
						
							|  |  |  | 		(_ptr)->device  = _device;			\ | 
					
						
							|  |  |  | 		(_ptr)->handler = _handler;			\ | 
					
						
							|  |  |  | 		INIT_LIST_HEAD(&(_ptr)->list);			\ | 
					
						
							|  |  |  | 	} while (0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_global_route { | 
					
						
							|  |  |  | 	union ib_gid	dgid; | 
					
						
							|  |  |  | 	u32		flow_label; | 
					
						
							|  |  |  | 	u8		sgid_index; | 
					
						
							|  |  |  | 	u8		hop_limit; | 
					
						
							|  |  |  | 	u8		traffic_class; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-07-27 11:45:34 -07:00
										 |  |  | struct ib_grh { | 
					
						
							| 
									
										
										
										
											2005-08-13 21:05:57 -07:00
										 |  |  | 	__be32		version_tclass_flow; | 
					
						
							|  |  |  | 	__be16		paylen; | 
					
						
							| 
									
										
										
										
											2005-07-27 11:45:34 -07:00
										 |  |  | 	u8		next_hdr; | 
					
						
							|  |  |  | 	u8		hop_limit; | 
					
						
							|  |  |  | 	union ib_gid	sgid; | 
					
						
							|  |  |  | 	union ib_gid	dgid; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | enum { | 
					
						
							|  |  |  | 	IB_MULTICAST_QPN = 0xffffff | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-02-14 22:58:35 -08:00
										 |  |  | #define IB_LID_PERMISSIVE	cpu_to_be16(0xFFFF)
 | 
					
						
							| 
									
										
										
										
											2005-08-13 21:05:57 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | enum ib_ah_flags { | 
					
						
							|  |  |  | 	IB_AH_GRH	= 1 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-04-10 09:43:47 -07:00
										 |  |  | enum ib_rate { | 
					
						
							|  |  |  | 	IB_RATE_PORT_CURRENT = 0, | 
					
						
							|  |  |  | 	IB_RATE_2_5_GBPS = 2, | 
					
						
							|  |  |  | 	IB_RATE_5_GBPS   = 5, | 
					
						
							|  |  |  | 	IB_RATE_10_GBPS  = 3, | 
					
						
							|  |  |  | 	IB_RATE_20_GBPS  = 6, | 
					
						
							|  |  |  | 	IB_RATE_30_GBPS  = 4, | 
					
						
							|  |  |  | 	IB_RATE_40_GBPS  = 7, | 
					
						
							|  |  |  | 	IB_RATE_60_GBPS  = 8, | 
					
						
							|  |  |  | 	IB_RATE_80_GBPS  = 9, | 
					
						
							| 
									
										
										
										
											2011-10-05 14:21:47 +03:00
										 |  |  | 	IB_RATE_120_GBPS = 10, | 
					
						
							|  |  |  | 	IB_RATE_14_GBPS  = 11, | 
					
						
							|  |  |  | 	IB_RATE_56_GBPS  = 12, | 
					
						
							|  |  |  | 	IB_RATE_112_GBPS = 13, | 
					
						
							|  |  |  | 	IB_RATE_168_GBPS = 14, | 
					
						
							|  |  |  | 	IB_RATE_25_GBPS  = 15, | 
					
						
							|  |  |  | 	IB_RATE_100_GBPS = 16, | 
					
						
							|  |  |  | 	IB_RATE_200_GBPS = 17, | 
					
						
							|  |  |  | 	IB_RATE_300_GBPS = 18 | 
					
						
							| 
									
										
										
										
											2006-04-10 09:43:47 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_rate_to_mult - Convert the IB rate enum to a multiple of the | 
					
						
							|  |  |  |  * base rate of 2.5 Gbit/sec.  For example, IB_RATE_5_GBPS will be | 
					
						
							|  |  |  |  * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec. | 
					
						
							|  |  |  |  * @rate: rate to convert. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-05 14:21:47 +03:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_rate_to_mbps - Convert the IB rate enum to Mbps. | 
					
						
							|  |  |  |  * For example, IB_RATE_2_5_GBPS will be converted to 2500. | 
					
						
							|  |  |  |  * @rate: rate to convert. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-04-10 09:43:47 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate | 
					
						
							|  |  |  |  * enum. | 
					
						
							|  |  |  |  * @mult: multiple to convert. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | enum ib_rate mult_to_ib_rate(int mult) __attribute_const__; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_ah_attr { | 
					
						
							|  |  |  | 	struct ib_global_route	grh; | 
					
						
							|  |  |  | 	u16			dlid; | 
					
						
							|  |  |  | 	u8			sl; | 
					
						
							|  |  |  | 	u8			src_path_bits; | 
					
						
							|  |  |  | 	u8			static_rate; | 
					
						
							|  |  |  | 	u8			ah_flags; | 
					
						
							|  |  |  | 	u8			port_num; | 
					
						
							| 
									
										
											  
											
												IB/core: Ethernet L2 attributes in verbs/cm structures
This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.
When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.
Thus, those attributes were added to the following structures:
* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id
For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.
On the active side, the CM fills. its internal structures from the
path provided by the ULP.  We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).
On the passive side, the CM fills its internal structures from the WC
associated with the REQ message.  We add there taking the ETH L2
attributes from the WC.
When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.
ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB.  Vendor drivers are modified to support the new
function signature.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-12-12 18:03:11 +02:00
										 |  |  | 	u8			dmac[ETH_ALEN]; | 
					
						
							|  |  |  | 	u16			vlan_id; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_wc_status { | 
					
						
							|  |  |  | 	IB_WC_SUCCESS, | 
					
						
							|  |  |  | 	IB_WC_LOC_LEN_ERR, | 
					
						
							|  |  |  | 	IB_WC_LOC_QP_OP_ERR, | 
					
						
							|  |  |  | 	IB_WC_LOC_EEC_OP_ERR, | 
					
						
							|  |  |  | 	IB_WC_LOC_PROT_ERR, | 
					
						
							|  |  |  | 	IB_WC_WR_FLUSH_ERR, | 
					
						
							|  |  |  | 	IB_WC_MW_BIND_ERR, | 
					
						
							|  |  |  | 	IB_WC_BAD_RESP_ERR, | 
					
						
							|  |  |  | 	IB_WC_LOC_ACCESS_ERR, | 
					
						
							|  |  |  | 	IB_WC_REM_INV_REQ_ERR, | 
					
						
							|  |  |  | 	IB_WC_REM_ACCESS_ERR, | 
					
						
							|  |  |  | 	IB_WC_REM_OP_ERR, | 
					
						
							|  |  |  | 	IB_WC_RETRY_EXC_ERR, | 
					
						
							|  |  |  | 	IB_WC_RNR_RETRY_EXC_ERR, | 
					
						
							|  |  |  | 	IB_WC_LOC_RDD_VIOL_ERR, | 
					
						
							|  |  |  | 	IB_WC_REM_INV_RD_REQ_ERR, | 
					
						
							|  |  |  | 	IB_WC_REM_ABORT_ERR, | 
					
						
							|  |  |  | 	IB_WC_INV_EECN_ERR, | 
					
						
							|  |  |  | 	IB_WC_INV_EEC_STATE_ERR, | 
					
						
							|  |  |  | 	IB_WC_FATAL_ERR, | 
					
						
							|  |  |  | 	IB_WC_RESP_TIMEOUT_ERR, | 
					
						
							|  |  |  | 	IB_WC_GENERAL_ERR | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_wc_opcode { | 
					
						
							|  |  |  | 	IB_WC_SEND, | 
					
						
							|  |  |  | 	IB_WC_RDMA_WRITE, | 
					
						
							|  |  |  | 	IB_WC_RDMA_READ, | 
					
						
							|  |  |  | 	IB_WC_COMP_SWAP, | 
					
						
							|  |  |  | 	IB_WC_FETCH_ADD, | 
					
						
							|  |  |  | 	IB_WC_BIND_MW, | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:27 -07:00
										 |  |  | 	IB_WC_LSO, | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:45 -07:00
										 |  |  | 	IB_WC_LOCAL_INV, | 
					
						
							|  |  |  | 	IB_WC_FAST_REG_MR, | 
					
						
							| 
									
										
											  
											
												IB/core: Add support for masked atomic operations
 - Add new IB_WR_MASKED_ATOMIC_CMP_AND_SWP and IB_WR_MASKED_ATOMIC_FETCH_AND_ADD
   send opcodes that can be used to post "masked atomic compare and
   swap" and "masked atomic fetch and add" work request respectively.
 - Add masked_atomic_cap capability.
 - Add mask fields to atomic struct of ib_send_wr
 - Add new opcodes to ib_wc_opcode
The new operations are described more precisely below:
* Masked Compare and Swap (MskCmpSwap)
The MskCmpSwap atomic operation is an extension to the CmpSwap
operation defined in the IB spec.  MskCmpSwap allows the user to
select a portion of the 64 bit target data for the “compare” check as
well as to restrict the swap to a (possibly different) portion.  The
pseudo code below describes the operation:
| atomic_response = *va
| if (!((compare_add ^ *va) & compare_add_mask)) then
|     *va = (*va & ~(swap_mask)) | (swap & swap_mask)
|
| return atomic_response
The additional operands are carried in the Extended Transport Header.
Atomic response generation and packet format for MskCmpSwap is as for
standard IB Atomic operations.
* Masked Fetch and Add (MFetchAdd)
The MFetchAdd Atomic operation extends the functionality of the
standard IB FetchAdd by allowing the user to split the target into
multiple fields of selectable length. The atomic add is done
independently on each one of this fields. A bit set in the
field_boundary parameter specifies the field boundaries. The pseudo
code below describes the operation:
| bit_adder(ci, b1, b2, *co)
| {
|	value = ci + b1 + b2
|	*co = !!(value & 2)
|
|	return value & 1
| }
|
| #define MASK_IS_SET(mask, attr)      (!!((mask)&(attr)))
| bit_position = 1
| carry = 0
| atomic_response = 0
|
| for i = 0 to 63
| {
|         if ( i != 0 )
|                 bit_position =  bit_position << 1
|
|         bit_add_res = bit_adder(carry, MASK_IS_SET(*va, bit_position),
|                                 MASK_IS_SET(compare_add, bit_position), &new_carry)
|         if (bit_add_res)
|                 atomic_response |= bit_position
|
|         carry = ((new_carry) && (!MASK_IS_SET(compare_add_mask, bit_position)))
| }
|
| return atomic_response
Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2010-04-14 17:23:01 +03:00
										 |  |  | 	IB_WC_MASKED_COMP_SWAP, | 
					
						
							|  |  |  | 	IB_WC_MASKED_FETCH_ADD, | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Set value of IB_WC_RECV so consumers can test if a completion is a | 
					
						
							|  |  |  |  * receive by testing (opcode & IB_WC_RECV). | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 	IB_WC_RECV			= 1 << 7, | 
					
						
							|  |  |  | 	IB_WC_RECV_RDMA_WITH_IMM | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_wc_flags { | 
					
						
							|  |  |  | 	IB_WC_GRH		= 1, | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:45 -07:00
										 |  |  | 	IB_WC_WITH_IMM		= (1<<1), | 
					
						
							|  |  |  | 	IB_WC_WITH_INVALIDATE	= (1<<2), | 
					
						
							| 
									
										
										
										
											2012-01-11 19:03:51 +02:00
										 |  |  | 	IB_WC_IP_CSUM_OK	= (1<<3), | 
					
						
							| 
									
										
											  
											
												IB/core: Ethernet L2 attributes in verbs/cm structures
This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.
When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.
Thus, those attributes were added to the following structures:
* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id
For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.
On the active side, the CM fills. its internal structures from the
path provided by the ULP.  We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).
On the passive side, the CM fills its internal structures from the WC
associated with the REQ message.  We add there taking the ETH L2
attributes from the WC.
When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.
ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB.  Vendor drivers are modified to support the new
function signature.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-12-12 18:03:11 +02:00
										 |  |  | 	IB_WC_WITH_SMAC		= (1<<4), | 
					
						
							|  |  |  | 	IB_WC_WITH_VLAN		= (1<<5), | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_wc { | 
					
						
							|  |  |  | 	u64			wr_id; | 
					
						
							|  |  |  | 	enum ib_wc_status	status; | 
					
						
							|  |  |  | 	enum ib_wc_opcode	opcode; | 
					
						
							|  |  |  | 	u32			vendor_err; | 
					
						
							|  |  |  | 	u32			byte_len; | 
					
						
							| 
									
										
										
										
											2006-12-31 21:09:42 +02:00
										 |  |  | 	struct ib_qp	       *qp; | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:45 -07:00
										 |  |  | 	union { | 
					
						
							|  |  |  | 		__be32		imm_data; | 
					
						
							|  |  |  | 		u32		invalidate_rkey; | 
					
						
							|  |  |  | 	} ex; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	u32			src_qp; | 
					
						
							|  |  |  | 	int			wc_flags; | 
					
						
							|  |  |  | 	u16			pkey_index; | 
					
						
							|  |  |  | 	u16			slid; | 
					
						
							|  |  |  | 	u8			sl; | 
					
						
							|  |  |  | 	u8			dlid_path_bits; | 
					
						
							|  |  |  | 	u8			port_num;	/* valid only for DR SMPs on switches */ | 
					
						
							| 
									
										
											  
											
												IB/core: Ethernet L2 attributes in verbs/cm structures
This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.
When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.
Thus, those attributes were added to the following structures:
* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id
For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.
On the active side, the CM fills. its internal structures from the
path provided by the ULP.  We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).
On the passive side, the CM fills its internal structures from the WC
associated with the REQ message.  We add there taking the ETH L2
attributes from the WC.
When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.
ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB.  Vendor drivers are modified to support the new
function signature.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-12-12 18:03:11 +02:00
										 |  |  | 	u8			smac[ETH_ALEN]; | 
					
						
							|  |  |  | 	u16			vlan_id; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												IB: Return "maybe missed event" hint from ib_req_notify_cq()
The semantics defined by the InfiniBand specification say that
completion events are only generated when a completions is added to a
completion queue (CQ) after completion notification is requested.  In
other words, this means that the following race is possible:
	while (CQ is not empty)
		ib_poll_cq(CQ);
	// new completion is added after while loop is exited
	ib_req_notify_cq(CQ);
	// no event is generated for the existing completion
To close this race, the IB spec recommends doing another poll of the
CQ after requesting notification.
However, it is not always possible to arrange code this way (for
example, we have found that NAPI for IPoIB cannot poll after
requesting notification).  Also, some hardware (eg Mellanox HCAs)
actually will generate an event for completions added before the call
to ib_req_notify_cq() -- which is allowed by the spec, since there's
no way for any upper-layer consumer to know exactly when a completion
was really added -- so the extra poll of the CQ is just a waste.
Motivated by this, we add a new flag "IB_CQ_REPORT_MISSED_EVENTS" for
ib_req_notify_cq() so that it can return a hint about whether the a
completion may have been added before the request for notification.
The return value of ib_req_notify_cq() is extended so:
	 < 0	means an error occurred while requesting notification
	== 0	means notification was requested successfully, and if
		IB_CQ_REPORT_MISSED_EVENTS was passed in, then no
		events were missed and it is safe to wait for another
		event.
	 > 0	is only returned if IB_CQ_REPORT_MISSED_EVENTS was
		passed in.  It means that the consumer must poll the
		CQ again to make sure it is empty to avoid the race
		described above.
We add a flag to enable this behavior rather than turning it on
unconditionally, because checking for missed events may incur
significant overhead for some low-level drivers, and consumers that
don't care about the results of this test shouldn't be forced to pay
for the test.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2007-05-06 21:02:48 -07:00
										 |  |  | enum ib_cq_notify_flags { | 
					
						
							|  |  |  | 	IB_CQ_SOLICITED			= 1 << 0, | 
					
						
							|  |  |  | 	IB_CQ_NEXT_COMP			= 1 << 1, | 
					
						
							|  |  |  | 	IB_CQ_SOLICITED_MASK		= IB_CQ_SOLICITED | IB_CQ_NEXT_COMP, | 
					
						
							|  |  |  | 	IB_CQ_REPORT_MISSED_EVENTS	= 1 << 2, | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-05-23 16:31:36 -07:00
										 |  |  | enum ib_srq_type { | 
					
						
							| 
									
										
										
										
											2011-05-23 19:42:29 -07:00
										 |  |  | 	IB_SRQT_BASIC, | 
					
						
							|  |  |  | 	IB_SRQT_XRC | 
					
						
							| 
									
										
										
										
											2011-05-23 16:31:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-08-18 12:23:08 -07:00
										 |  |  | enum ib_srq_attr_mask { | 
					
						
							|  |  |  | 	IB_SRQ_MAX_WR	= 1 << 0, | 
					
						
							|  |  |  | 	IB_SRQ_LIMIT	= 1 << 1, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_srq_attr { | 
					
						
							|  |  |  | 	u32	max_wr; | 
					
						
							|  |  |  | 	u32	max_sge; | 
					
						
							|  |  |  | 	u32	srq_limit; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_srq_init_attr { | 
					
						
							|  |  |  | 	void		      (*event_handler)(struct ib_event *, void *); | 
					
						
							|  |  |  | 	void		       *srq_context; | 
					
						
							|  |  |  | 	struct ib_srq_attr	attr; | 
					
						
							| 
									
										
										
										
											2011-05-23 16:31:36 -07:00
										 |  |  | 	enum ib_srq_type	srq_type; | 
					
						
							| 
									
										
										
										
											2011-05-23 19:42:29 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	union { | 
					
						
							|  |  |  | 		struct { | 
					
						
							|  |  |  | 			struct ib_xrcd *xrcd; | 
					
						
							|  |  |  | 			struct ib_cq   *cq; | 
					
						
							|  |  |  | 		} xrc; | 
					
						
							|  |  |  | 	} ext; | 
					
						
							| 
									
										
										
										
											2005-08-18 12:23:08 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_qp_cap { | 
					
						
							|  |  |  | 	u32	max_send_wr; | 
					
						
							|  |  |  | 	u32	max_recv_wr; | 
					
						
							|  |  |  | 	u32	max_send_sge; | 
					
						
							|  |  |  | 	u32	max_recv_sge; | 
					
						
							|  |  |  | 	u32	max_inline_data; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_sig_type { | 
					
						
							|  |  |  | 	IB_SIGNAL_ALL_WR, | 
					
						
							|  |  |  | 	IB_SIGNAL_REQ_WR | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_qp_type { | 
					
						
							|  |  |  | 	/*
 | 
					
						
							|  |  |  | 	 * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries | 
					
						
							|  |  |  | 	 * here (and in that order) since the MAD layer uses them as | 
					
						
							|  |  |  | 	 * indices into a 2-entry table. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	IB_QPT_SMI, | 
					
						
							|  |  |  | 	IB_QPT_GSI, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	IB_QPT_RC, | 
					
						
							|  |  |  | 	IB_QPT_UC, | 
					
						
							|  |  |  | 	IB_QPT_UD, | 
					
						
							|  |  |  | 	IB_QPT_RAW_IPV6, | 
					
						
							| 
									
										
										
										
											2011-05-23 19:59:25 -07:00
										 |  |  | 	IB_QPT_RAW_ETHERTYPE, | 
					
						
							| 
									
										
										
										
											2012-03-01 12:17:51 +02:00
										 |  |  | 	IB_QPT_RAW_PACKET = 8, | 
					
						
							| 
									
										
										
										
											2011-05-23 19:59:25 -07:00
										 |  |  | 	IB_QPT_XRC_INI = 9, | 
					
						
							|  |  |  | 	IB_QPT_XRC_TGT, | 
					
						
							| 
									
										
										
										
											2013-07-07 17:25:52 +03:00
										 |  |  | 	IB_QPT_MAX, | 
					
						
							|  |  |  | 	/* Reserve a range for qp types internal to the low level driver.
 | 
					
						
							|  |  |  | 	 * These qp types will not be visible at the IB core layer, so the | 
					
						
							|  |  |  | 	 * IB_QPT_MAX usages should not be affected in the core layer | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	IB_QPT_RESERVED1 = 0x1000, | 
					
						
							|  |  |  | 	IB_QPT_RESERVED2, | 
					
						
							|  |  |  | 	IB_QPT_RESERVED3, | 
					
						
							|  |  |  | 	IB_QPT_RESERVED4, | 
					
						
							|  |  |  | 	IB_QPT_RESERVED5, | 
					
						
							|  |  |  | 	IB_QPT_RESERVED6, | 
					
						
							|  |  |  | 	IB_QPT_RESERVED7, | 
					
						
							|  |  |  | 	IB_QPT_RESERVED8, | 
					
						
							|  |  |  | 	IB_QPT_RESERVED9, | 
					
						
							|  |  |  | 	IB_QPT_RESERVED10, | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:27 -07:00
										 |  |  | enum ib_qp_create_flags { | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:48 -07:00
										 |  |  | 	IB_QP_CREATE_IPOIB_UD_LSO		= 1 << 0, | 
					
						
							|  |  |  | 	IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK	= 1 << 1, | 
					
						
							| 
									
										
										
										
											2013-11-07 15:25:12 +02:00
										 |  |  | 	IB_QP_CREATE_NETIF_QP			= 1 << 5, | 
					
						
							| 
									
										
										
										
											2012-08-03 08:40:37 +00:00
										 |  |  | 	/* reserve bits 26-31 for low level drivers' internal use */ | 
					
						
							|  |  |  | 	IB_QP_CREATE_RESERVED_START		= 1 << 26, | 
					
						
							|  |  |  | 	IB_QP_CREATE_RESERVED_END		= 1 << 31, | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:27 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-01 18:49:53 +03:00
										 |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler | 
					
						
							|  |  |  |  * callback to destroy the passed in QP. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_qp_init_attr { | 
					
						
							|  |  |  | 	void                  (*event_handler)(struct ib_event *, void *); | 
					
						
							|  |  |  | 	void		       *qp_context; | 
					
						
							|  |  |  | 	struct ib_cq	       *send_cq; | 
					
						
							|  |  |  | 	struct ib_cq	       *recv_cq; | 
					
						
							|  |  |  | 	struct ib_srq	       *srq; | 
					
						
							| 
									
										
										
										
											2011-05-23 19:59:25 -07:00
										 |  |  | 	struct ib_xrcd	       *xrcd;     /* XRC TGT QPs only */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	struct ib_qp_cap	cap; | 
					
						
							|  |  |  | 	enum ib_sig_type	sq_sig_type; | 
					
						
							|  |  |  | 	enum ib_qp_type		qp_type; | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:27 -07:00
										 |  |  | 	enum ib_qp_create_flags	create_flags; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	u8			port_num; /* special QP types only */ | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-08-08 15:31:51 -07:00
										 |  |  | struct ib_qp_open_attr { | 
					
						
							|  |  |  | 	void                  (*event_handler)(struct ib_event *, void *); | 
					
						
							|  |  |  | 	void		       *qp_context; | 
					
						
							|  |  |  | 	u32			qp_num; | 
					
						
							|  |  |  | 	enum ib_qp_type		qp_type; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | enum ib_rnr_timeout { | 
					
						
							|  |  |  | 	IB_RNR_TIMER_655_36 =  0, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_01 =  1, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_02 =  2, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_03 =  3, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_04 =  4, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_06 =  5, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_08 =  6, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_12 =  7, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_16 =  8, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_24 =  9, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_32 = 10, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_48 = 11, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_64 = 12, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_000_96 = 13, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_001_28 = 14, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_001_92 = 15, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_002_56 = 16, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_003_84 = 17, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_005_12 = 18, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_007_68 = 19, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_010_24 = 20, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_015_36 = 21, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_020_48 = 22, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_030_72 = 23, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_040_96 = 24, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_061_44 = 25, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_081_92 = 26, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_122_88 = 27, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_163_84 = 28, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_245_76 = 29, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_327_68 = 30, | 
					
						
							|  |  |  | 	IB_RNR_TIMER_491_52 = 31 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_qp_attr_mask { | 
					
						
							|  |  |  | 	IB_QP_STATE			= 1, | 
					
						
							|  |  |  | 	IB_QP_CUR_STATE			= (1<<1), | 
					
						
							|  |  |  | 	IB_QP_EN_SQD_ASYNC_NOTIFY	= (1<<2), | 
					
						
							|  |  |  | 	IB_QP_ACCESS_FLAGS		= (1<<3), | 
					
						
							|  |  |  | 	IB_QP_PKEY_INDEX		= (1<<4), | 
					
						
							|  |  |  | 	IB_QP_PORT			= (1<<5), | 
					
						
							|  |  |  | 	IB_QP_QKEY			= (1<<6), | 
					
						
							|  |  |  | 	IB_QP_AV			= (1<<7), | 
					
						
							|  |  |  | 	IB_QP_PATH_MTU			= (1<<8), | 
					
						
							|  |  |  | 	IB_QP_TIMEOUT			= (1<<9), | 
					
						
							|  |  |  | 	IB_QP_RETRY_CNT			= (1<<10), | 
					
						
							|  |  |  | 	IB_QP_RNR_RETRY			= (1<<11), | 
					
						
							|  |  |  | 	IB_QP_RQ_PSN			= (1<<12), | 
					
						
							|  |  |  | 	IB_QP_MAX_QP_RD_ATOMIC		= (1<<13), | 
					
						
							|  |  |  | 	IB_QP_ALT_PATH			= (1<<14), | 
					
						
							|  |  |  | 	IB_QP_MIN_RNR_TIMER		= (1<<15), | 
					
						
							|  |  |  | 	IB_QP_SQ_PSN			= (1<<16), | 
					
						
							|  |  |  | 	IB_QP_MAX_DEST_RD_ATOMIC	= (1<<17), | 
					
						
							|  |  |  | 	IB_QP_PATH_MIG_STATE		= (1<<18), | 
					
						
							|  |  |  | 	IB_QP_CAP			= (1<<19), | 
					
						
							| 
									
										
											  
											
												IB/core: Ethernet L2 attributes in verbs/cm structures
This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.
When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.
Thus, those attributes were added to the following structures:
* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id
For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.
On the active side, the CM fills. its internal structures from the
path provided by the ULP.  We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).
On the passive side, the CM fills its internal structures from the WC
associated with the REQ message.  We add there taking the ETH L2
attributes from the WC.
When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.
ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB.  Vendor drivers are modified to support the new
function signature.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-12-12 18:03:11 +02:00
										 |  |  | 	IB_QP_DEST_QPN			= (1<<20), | 
					
						
							|  |  |  | 	IB_QP_SMAC			= (1<<21), | 
					
						
							|  |  |  | 	IB_QP_ALT_SMAC			= (1<<22), | 
					
						
							|  |  |  | 	IB_QP_VID			= (1<<23), | 
					
						
							|  |  |  | 	IB_QP_ALT_VID			= (1<<24), | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_qp_state { | 
					
						
							|  |  |  | 	IB_QPS_RESET, | 
					
						
							|  |  |  | 	IB_QPS_INIT, | 
					
						
							|  |  |  | 	IB_QPS_RTR, | 
					
						
							|  |  |  | 	IB_QPS_RTS, | 
					
						
							|  |  |  | 	IB_QPS_SQD, | 
					
						
							|  |  |  | 	IB_QPS_SQE, | 
					
						
							|  |  |  | 	IB_QPS_ERR | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_mig_state { | 
					
						
							|  |  |  | 	IB_MIG_MIGRATED, | 
					
						
							|  |  |  | 	IB_MIG_REARM, | 
					
						
							|  |  |  | 	IB_MIG_ARMED | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | enum ib_mw_type { | 
					
						
							|  |  |  | 	IB_MW_TYPE_1 = 1, | 
					
						
							|  |  |  | 	IB_MW_TYPE_2 = 2 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_qp_attr { | 
					
						
							|  |  |  | 	enum ib_qp_state	qp_state; | 
					
						
							|  |  |  | 	enum ib_qp_state	cur_qp_state; | 
					
						
							|  |  |  | 	enum ib_mtu		path_mtu; | 
					
						
							|  |  |  | 	enum ib_mig_state	path_mig_state; | 
					
						
							|  |  |  | 	u32			qkey; | 
					
						
							|  |  |  | 	u32			rq_psn; | 
					
						
							|  |  |  | 	u32			sq_psn; | 
					
						
							|  |  |  | 	u32			dest_qp_num; | 
					
						
							|  |  |  | 	int			qp_access_flags; | 
					
						
							|  |  |  | 	struct ib_qp_cap	cap; | 
					
						
							|  |  |  | 	struct ib_ah_attr	ah_attr; | 
					
						
							|  |  |  | 	struct ib_ah_attr	alt_ah_attr; | 
					
						
							|  |  |  | 	u16			pkey_index; | 
					
						
							|  |  |  | 	u16			alt_pkey_index; | 
					
						
							|  |  |  | 	u8			en_sqd_async_notify; | 
					
						
							|  |  |  | 	u8			sq_draining; | 
					
						
							|  |  |  | 	u8			max_rd_atomic; | 
					
						
							|  |  |  | 	u8			max_dest_rd_atomic; | 
					
						
							|  |  |  | 	u8			min_rnr_timer; | 
					
						
							|  |  |  | 	u8			port_num; | 
					
						
							|  |  |  | 	u8			timeout; | 
					
						
							|  |  |  | 	u8			retry_cnt; | 
					
						
							|  |  |  | 	u8			rnr_retry; | 
					
						
							|  |  |  | 	u8			alt_port_num; | 
					
						
							|  |  |  | 	u8			alt_timeout; | 
					
						
							| 
									
										
											  
											
												IB/core: Ethernet L2 attributes in verbs/cm structures
This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.
When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.
Thus, those attributes were added to the following structures:
* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id
For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.
On the active side, the CM fills. its internal structures from the
path provided by the ULP.  We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).
On the passive side, the CM fills its internal structures from the WC
associated with the REQ message.  We add there taking the ETH L2
attributes from the WC.
When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.
ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB.  Vendor drivers are modified to support the new
function signature.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-12-12 18:03:11 +02:00
										 |  |  | 	u8			smac[ETH_ALEN]; | 
					
						
							|  |  |  | 	u8			alt_smac[ETH_ALEN]; | 
					
						
							|  |  |  | 	u16			vlan_id; | 
					
						
							|  |  |  | 	u16			alt_vlan_id; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_wr_opcode { | 
					
						
							|  |  |  | 	IB_WR_RDMA_WRITE, | 
					
						
							|  |  |  | 	IB_WR_RDMA_WRITE_WITH_IMM, | 
					
						
							|  |  |  | 	IB_WR_SEND, | 
					
						
							|  |  |  | 	IB_WR_SEND_WITH_IMM, | 
					
						
							|  |  |  | 	IB_WR_RDMA_READ, | 
					
						
							|  |  |  | 	IB_WR_ATOMIC_CMP_AND_SWP, | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:27 -07:00
										 |  |  | 	IB_WR_ATOMIC_FETCH_AND_ADD, | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:32 -07:00
										 |  |  | 	IB_WR_LSO, | 
					
						
							|  |  |  | 	IB_WR_SEND_WITH_INV, | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:45 -07:00
										 |  |  | 	IB_WR_RDMA_READ_WITH_INV, | 
					
						
							|  |  |  | 	IB_WR_LOCAL_INV, | 
					
						
							|  |  |  | 	IB_WR_FAST_REG_MR, | 
					
						
							| 
									
										
											  
											
												IB/core: Add support for masked atomic operations
 - Add new IB_WR_MASKED_ATOMIC_CMP_AND_SWP and IB_WR_MASKED_ATOMIC_FETCH_AND_ADD
   send opcodes that can be used to post "masked atomic compare and
   swap" and "masked atomic fetch and add" work request respectively.
 - Add masked_atomic_cap capability.
 - Add mask fields to atomic struct of ib_send_wr
 - Add new opcodes to ib_wc_opcode
The new operations are described more precisely below:
* Masked Compare and Swap (MskCmpSwap)
The MskCmpSwap atomic operation is an extension to the CmpSwap
operation defined in the IB spec.  MskCmpSwap allows the user to
select a portion of the 64 bit target data for the “compare” check as
well as to restrict the swap to a (possibly different) portion.  The
pseudo code below describes the operation:
| atomic_response = *va
| if (!((compare_add ^ *va) & compare_add_mask)) then
|     *va = (*va & ~(swap_mask)) | (swap & swap_mask)
|
| return atomic_response
The additional operands are carried in the Extended Transport Header.
Atomic response generation and packet format for MskCmpSwap is as for
standard IB Atomic operations.
* Masked Fetch and Add (MFetchAdd)
The MFetchAdd Atomic operation extends the functionality of the
standard IB FetchAdd by allowing the user to split the target into
multiple fields of selectable length. The atomic add is done
independently on each one of this fields. A bit set in the
field_boundary parameter specifies the field boundaries. The pseudo
code below describes the operation:
| bit_adder(ci, b1, b2, *co)
| {
|	value = ci + b1 + b2
|	*co = !!(value & 2)
|
|	return value & 1
| }
|
| #define MASK_IS_SET(mask, attr)      (!!((mask)&(attr)))
| bit_position = 1
| carry = 0
| atomic_response = 0
|
| for i = 0 to 63
| {
|         if ( i != 0 )
|                 bit_position =  bit_position << 1
|
|         bit_add_res = bit_adder(carry, MASK_IS_SET(*va, bit_position),
|                                 MASK_IS_SET(compare_add, bit_position), &new_carry)
|         if (bit_add_res)
|                 atomic_response |= bit_position
|
|         carry = ((new_carry) && (!MASK_IS_SET(compare_add_mask, bit_position)))
| }
|
| return atomic_response
Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2010-04-14 17:23:01 +03:00
										 |  |  | 	IB_WR_MASKED_ATOMIC_CMP_AND_SWP, | 
					
						
							|  |  |  | 	IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | 	IB_WR_BIND_MW, | 
					
						
							| 
									
										
										
										
											2013-07-07 17:25:52 +03:00
										 |  |  | 	/* reserve values for low level drivers' internal use.
 | 
					
						
							|  |  |  | 	 * These values will not be used at all in the ib core layer. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	IB_WR_RESERVED1 = 0xf0, | 
					
						
							|  |  |  | 	IB_WR_RESERVED2, | 
					
						
							|  |  |  | 	IB_WR_RESERVED3, | 
					
						
							|  |  |  | 	IB_WR_RESERVED4, | 
					
						
							|  |  |  | 	IB_WR_RESERVED5, | 
					
						
							|  |  |  | 	IB_WR_RESERVED6, | 
					
						
							|  |  |  | 	IB_WR_RESERVED7, | 
					
						
							|  |  |  | 	IB_WR_RESERVED8, | 
					
						
							|  |  |  | 	IB_WR_RESERVED9, | 
					
						
							|  |  |  | 	IB_WR_RESERVED10, | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_send_flags { | 
					
						
							|  |  |  | 	IB_SEND_FENCE		= 1, | 
					
						
							|  |  |  | 	IB_SEND_SIGNALED	= (1<<1), | 
					
						
							|  |  |  | 	IB_SEND_SOLICITED	= (1<<2), | 
					
						
							| 
									
										
										
										
											2008-01-30 18:30:57 +02:00
										 |  |  | 	IB_SEND_INLINE		= (1<<3), | 
					
						
							| 
									
										
										
										
											2013-07-07 17:25:52 +03:00
										 |  |  | 	IB_SEND_IP_CSUM		= (1<<4), | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* reserve bits 26-31 for low level drivers' internal use */ | 
					
						
							|  |  |  | 	IB_SEND_RESERVED_START	= (1 << 26), | 
					
						
							|  |  |  | 	IB_SEND_RESERVED_END	= (1 << 31), | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_sge { | 
					
						
							|  |  |  | 	u64	addr; | 
					
						
							|  |  |  | 	u32	length; | 
					
						
							|  |  |  | 	u32	lkey; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:45 -07:00
										 |  |  | struct ib_fast_reg_page_list { | 
					
						
							|  |  |  | 	struct ib_device       *device; | 
					
						
							|  |  |  | 	u64		       *page_list; | 
					
						
							|  |  |  | 	unsigned int		max_page_list_len; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * struct ib_mw_bind_info - Parameters for a memory window bind operation. | 
					
						
							|  |  |  |  * @mr: A memory region to bind the memory window to. | 
					
						
							|  |  |  |  * @addr: The address where the memory window should begin. | 
					
						
							|  |  |  |  * @length: The length of the memory window, in bytes. | 
					
						
							|  |  |  |  * @mw_access_flags: Access flags from enum ib_access_flags for the window. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This struct contains the shared parameters for type 1 and type 2 | 
					
						
							|  |  |  |  * memory window bind operations. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_mw_bind_info { | 
					
						
							|  |  |  | 	struct ib_mr   *mr; | 
					
						
							|  |  |  | 	u64		addr; | 
					
						
							|  |  |  | 	u64		length; | 
					
						
							|  |  |  | 	int		mw_access_flags; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_send_wr { | 
					
						
							|  |  |  | 	struct ib_send_wr      *next; | 
					
						
							|  |  |  | 	u64			wr_id; | 
					
						
							|  |  |  | 	struct ib_sge	       *sg_list; | 
					
						
							|  |  |  | 	int			num_sge; | 
					
						
							|  |  |  | 	enum ib_wr_opcode	opcode; | 
					
						
							|  |  |  | 	int			send_flags; | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:32 -07:00
										 |  |  | 	union { | 
					
						
							|  |  |  | 		__be32		imm_data; | 
					
						
							|  |  |  | 		u32		invalidate_rkey; | 
					
						
							|  |  |  | 	} ex; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	union { | 
					
						
							|  |  |  | 		struct { | 
					
						
							|  |  |  | 			u64	remote_addr; | 
					
						
							|  |  |  | 			u32	rkey; | 
					
						
							|  |  |  | 		} rdma; | 
					
						
							|  |  |  | 		struct { | 
					
						
							|  |  |  | 			u64	remote_addr; | 
					
						
							|  |  |  | 			u64	compare_add; | 
					
						
							|  |  |  | 			u64	swap; | 
					
						
							| 
									
										
											  
											
												IB/core: Add support for masked atomic operations
 - Add new IB_WR_MASKED_ATOMIC_CMP_AND_SWP and IB_WR_MASKED_ATOMIC_FETCH_AND_ADD
   send opcodes that can be used to post "masked atomic compare and
   swap" and "masked atomic fetch and add" work request respectively.
 - Add masked_atomic_cap capability.
 - Add mask fields to atomic struct of ib_send_wr
 - Add new opcodes to ib_wc_opcode
The new operations are described more precisely below:
* Masked Compare and Swap (MskCmpSwap)
The MskCmpSwap atomic operation is an extension to the CmpSwap
operation defined in the IB spec.  MskCmpSwap allows the user to
select a portion of the 64 bit target data for the “compare” check as
well as to restrict the swap to a (possibly different) portion.  The
pseudo code below describes the operation:
| atomic_response = *va
| if (!((compare_add ^ *va) & compare_add_mask)) then
|     *va = (*va & ~(swap_mask)) | (swap & swap_mask)
|
| return atomic_response
The additional operands are carried in the Extended Transport Header.
Atomic response generation and packet format for MskCmpSwap is as for
standard IB Atomic operations.
* Masked Fetch and Add (MFetchAdd)
The MFetchAdd Atomic operation extends the functionality of the
standard IB FetchAdd by allowing the user to split the target into
multiple fields of selectable length. The atomic add is done
independently on each one of this fields. A bit set in the
field_boundary parameter specifies the field boundaries. The pseudo
code below describes the operation:
| bit_adder(ci, b1, b2, *co)
| {
|	value = ci + b1 + b2
|	*co = !!(value & 2)
|
|	return value & 1
| }
|
| #define MASK_IS_SET(mask, attr)      (!!((mask)&(attr)))
| bit_position = 1
| carry = 0
| atomic_response = 0
|
| for i = 0 to 63
| {
|         if ( i != 0 )
|                 bit_position =  bit_position << 1
|
|         bit_add_res = bit_adder(carry, MASK_IS_SET(*va, bit_position),
|                                 MASK_IS_SET(compare_add, bit_position), &new_carry)
|         if (bit_add_res)
|                 atomic_response |= bit_position
|
|         carry = ((new_carry) && (!MASK_IS_SET(compare_add_mask, bit_position)))
| }
|
| return atomic_response
Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2010-04-14 17:23:01 +03:00
										 |  |  | 			u64	compare_add_mask; | 
					
						
							|  |  |  | 			u64	swap_mask; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 			u32	rkey; | 
					
						
							|  |  |  | 		} atomic; | 
					
						
							|  |  |  | 		struct { | 
					
						
							|  |  |  | 			struct ib_ah *ah; | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:27 -07:00
										 |  |  | 			void   *header; | 
					
						
							|  |  |  | 			int     hlen; | 
					
						
							|  |  |  | 			int     mss; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 			u32	remote_qpn; | 
					
						
							|  |  |  | 			u32	remote_qkey; | 
					
						
							|  |  |  | 			u16	pkey_index; /* valid for GSI only */ | 
					
						
							|  |  |  | 			u8	port_num;   /* valid for DR SMPs on switch only */ | 
					
						
							|  |  |  | 		} ud; | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:45 -07:00
										 |  |  | 		struct { | 
					
						
							|  |  |  | 			u64				iova_start; | 
					
						
							|  |  |  | 			struct ib_fast_reg_page_list   *page_list; | 
					
						
							|  |  |  | 			unsigned int			page_shift; | 
					
						
							|  |  |  | 			unsigned int			page_list_len; | 
					
						
							|  |  |  | 			u32				length; | 
					
						
							|  |  |  | 			int				access_flags; | 
					
						
							|  |  |  | 			u32				rkey; | 
					
						
							|  |  |  | 		} fast_reg; | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | 		struct { | 
					
						
							|  |  |  | 			struct ib_mw            *mw; | 
					
						
							|  |  |  | 			/* The new rkey for the memory window. */ | 
					
						
							|  |  |  | 			u32                      rkey; | 
					
						
							|  |  |  | 			struct ib_mw_bind_info   bind_info; | 
					
						
							|  |  |  | 		} bind_mw; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	} wr; | 
					
						
							| 
									
										
										
										
											2011-05-23 19:59:25 -07:00
										 |  |  | 	u32			xrc_remote_srq_num;	/* XRC TGT QPs only */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_recv_wr { | 
					
						
							|  |  |  | 	struct ib_recv_wr      *next; | 
					
						
							|  |  |  | 	u64			wr_id; | 
					
						
							|  |  |  | 	struct ib_sge	       *sg_list; | 
					
						
							|  |  |  | 	int			num_sge; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_access_flags { | 
					
						
							|  |  |  | 	IB_ACCESS_LOCAL_WRITE	= 1, | 
					
						
							|  |  |  | 	IB_ACCESS_REMOTE_WRITE	= (1<<1), | 
					
						
							|  |  |  | 	IB_ACCESS_REMOTE_READ	= (1<<2), | 
					
						
							|  |  |  | 	IB_ACCESS_REMOTE_ATOMIC	= (1<<3), | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | 	IB_ACCESS_MW_BIND	= (1<<4), | 
					
						
							|  |  |  | 	IB_ZERO_BASED		= (1<<5) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_phys_buf { | 
					
						
							|  |  |  | 	u64      addr; | 
					
						
							|  |  |  | 	u64      size; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_mr_attr { | 
					
						
							|  |  |  | 	struct ib_pd	*pd; | 
					
						
							|  |  |  | 	u64		device_virt_addr; | 
					
						
							|  |  |  | 	u64		size; | 
					
						
							|  |  |  | 	int		mr_access_flags; | 
					
						
							|  |  |  | 	u32		lkey; | 
					
						
							|  |  |  | 	u32		rkey; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_mr_rereg_flags { | 
					
						
							|  |  |  | 	IB_MR_REREG_TRANS	= 1, | 
					
						
							|  |  |  | 	IB_MR_REREG_PD		= (1<<1), | 
					
						
							|  |  |  | 	IB_MR_REREG_ACCESS	= (1<<2) | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * struct ib_mw_bind - Parameters for a type 1 memory window bind operation. | 
					
						
							|  |  |  |  * @wr_id:      Work request id. | 
					
						
							|  |  |  |  * @send_flags: Flags from ib_send_flags enum. | 
					
						
							|  |  |  |  * @bind_info:  More parameters of the bind operation. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_mw_bind { | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | 	u64                    wr_id; | 
					
						
							|  |  |  | 	int                    send_flags; | 
					
						
							|  |  |  | 	struct ib_mw_bind_info bind_info; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_fmr_attr { | 
					
						
							|  |  |  | 	int	max_pages; | 
					
						
							|  |  |  | 	int	max_maps; | 
					
						
							| 
									
										
										
										
											2006-02-02 10:43:45 -08:00
										 |  |  | 	u8	page_shift; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | struct ib_ucontext { | 
					
						
							|  |  |  | 	struct ib_device       *device; | 
					
						
							|  |  |  | 	struct list_head	pd_list; | 
					
						
							|  |  |  | 	struct list_head	mr_list; | 
					
						
							|  |  |  | 	struct list_head	mw_list; | 
					
						
							|  |  |  | 	struct list_head	cq_list; | 
					
						
							|  |  |  | 	struct list_head	qp_list; | 
					
						
							|  |  |  | 	struct list_head	srq_list; | 
					
						
							|  |  |  | 	struct list_head	ah_list; | 
					
						
							| 
									
										
										
										
											2011-05-24 08:33:46 -07:00
										 |  |  | 	struct list_head	xrcd_list; | 
					
						
							| 
									
										
										
										
											2013-08-14 13:58:30 +03:00
										 |  |  | 	struct list_head	rule_list; | 
					
						
							| 
									
										
										
										
											2007-03-04 16:15:11 -08:00
										 |  |  | 	int			closing; | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_uobject { | 
					
						
							|  |  |  | 	u64			user_handle;	/* handle given to us by userspace */ | 
					
						
							|  |  |  | 	struct ib_ucontext     *context;	/* associated user context */ | 
					
						
							| 
									
										
											  
											
												IB/uverbs: Don't serialize with ib_uverbs_idr_mutex
Currently, all userspace verbs operations that call into the kernel
are serialized by ib_uverbs_idr_mutex.  This can be a scalability
issue for some workloads, especially for devices driven by the ipath
driver, which needs to call into the kernel even for datapath
operations.
Fix this by adding reference counts to the userspace objects, and then
converting ib_uverbs_idr_mutex into a spinlock that only protects the
idrs long enough to take a reference on the object being looked up.
Because remove operations may fail, we have to do a slightly funky
two-step deletion, which is described in the comments at the top of
uverbs_cmd.c.
This also still leaves ib_uverbs_idr_lock as a single lock that is
possibly subject to contention.  However, the lock hold time will only
be a single idr operation, so multiple threads should still be able to
make progress, even if ib_uverbs_idr_lock is being ping-ponged.
Surprisingly, these changes even shrink the object code:
add/remove: 23/5 grow/shrink: 4/21 up/down: 633/-693 (-60)
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2006-06-17 20:44:49 -07:00
										 |  |  | 	void		       *object;		/* containing object */ | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct list_head	list;		/* link to context's list */ | 
					
						
							| 
									
										
										
										
											2008-04-16 21:01:06 -07:00
										 |  |  | 	int			id;		/* index into kernel idr */ | 
					
						
							| 
									
										
											  
											
												IB/uverbs: Don't serialize with ib_uverbs_idr_mutex
Currently, all userspace verbs operations that call into the kernel
are serialized by ib_uverbs_idr_mutex.  This can be a scalability
issue for some workloads, especially for devices driven by the ipath
driver, which needs to call into the kernel even for datapath
operations.
Fix this by adding reference counts to the userspace objects, and then
converting ib_uverbs_idr_mutex into a spinlock that only protects the
idrs long enough to take a reference on the object being looked up.
Because remove operations may fail, we have to do a slightly funky
two-step deletion, which is described in the comments at the top of
uverbs_cmd.c.
This also still leaves ib_uverbs_idr_lock as a single lock that is
possibly subject to contention.  However, the lock hold time will only
be a single idr operation, so multiple threads should still be able to
make progress, even if ib_uverbs_idr_lock is being ping-ponged.
Surprisingly, these changes even shrink the object code:
add/remove: 23/5 grow/shrink: 4/21 up/down: 633/-693 (-60)
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2006-06-17 20:44:49 -07:00
										 |  |  | 	struct kref		ref; | 
					
						
							|  |  |  | 	struct rw_semaphore	mutex;		/* protects .live */ | 
					
						
							|  |  |  | 	int			live; | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_udata { | 
					
						
							| 
									
										
										
										
											2013-12-11 23:01:44 +01:00
										 |  |  | 	const void __user *inbuf; | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	void __user *outbuf; | 
					
						
							|  |  |  | 	size_t       inlen; | 
					
						
							|  |  |  | 	size_t       outlen; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_pd { | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct ib_device       *device; | 
					
						
							|  |  |  | 	struct ib_uobject      *uobject; | 
					
						
							|  |  |  | 	atomic_t          	usecnt; /* count all resources */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-05-23 17:52:46 -07:00
										 |  |  | struct ib_xrcd { | 
					
						
							|  |  |  | 	struct ib_device       *device; | 
					
						
							| 
									
										
										
										
											2011-05-26 23:06:44 -07:00
										 |  |  | 	atomic_t		usecnt; /* count all exposed resources */ | 
					
						
							| 
									
										
										
										
											2011-05-24 08:33:46 -07:00
										 |  |  | 	struct inode	       *inode; | 
					
						
							| 
									
										
										
										
											2011-05-26 23:06:44 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	struct mutex		tgt_qp_mutex; | 
					
						
							|  |  |  | 	struct list_head	tgt_qp_list; | 
					
						
							| 
									
										
										
										
											2011-05-23 17:52:46 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_ah { | 
					
						
							|  |  |  | 	struct ib_device	*device; | 
					
						
							|  |  |  | 	struct ib_pd		*pd; | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct ib_uobject	*uobject; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_cq { | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct ib_device       *device; | 
					
						
							|  |  |  | 	struct ib_uobject      *uobject; | 
					
						
							|  |  |  | 	ib_comp_handler   	comp_handler; | 
					
						
							|  |  |  | 	void                  (*event_handler)(struct ib_event *, void *); | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:44 -07:00
										 |  |  | 	void                   *cq_context; | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	int               	cqe; | 
					
						
							|  |  |  | 	atomic_t          	usecnt; /* count number of work queues */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_srq { | 
					
						
							| 
									
										
										
										
											2005-08-18 12:23:08 -07:00
										 |  |  | 	struct ib_device       *device; | 
					
						
							|  |  |  | 	struct ib_pd	       *pd; | 
					
						
							|  |  |  | 	struct ib_uobject      *uobject; | 
					
						
							|  |  |  | 	void		      (*event_handler)(struct ib_event *, void *); | 
					
						
							|  |  |  | 	void		       *srq_context; | 
					
						
							| 
									
										
										
										
											2011-05-23 16:31:36 -07:00
										 |  |  | 	enum ib_srq_type	srq_type; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	atomic_t		usecnt; | 
					
						
							| 
									
										
										
										
											2011-05-23 19:42:29 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	union { | 
					
						
							|  |  |  | 		struct { | 
					
						
							|  |  |  | 			struct ib_xrcd *xrcd; | 
					
						
							|  |  |  | 			struct ib_cq   *cq; | 
					
						
							|  |  |  | 			u32		srq_num; | 
					
						
							|  |  |  | 		} xrc; | 
					
						
							|  |  |  | 	} ext; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_qp { | 
					
						
							|  |  |  | 	struct ib_device       *device; | 
					
						
							|  |  |  | 	struct ib_pd	       *pd; | 
					
						
							|  |  |  | 	struct ib_cq	       *send_cq; | 
					
						
							|  |  |  | 	struct ib_cq	       *recv_cq; | 
					
						
							|  |  |  | 	struct ib_srq	       *srq; | 
					
						
							| 
									
										
										
										
											2011-05-23 19:59:25 -07:00
										 |  |  | 	struct ib_xrcd	       *xrcd; /* XRC TGT QPs only */ | 
					
						
							| 
									
										
										
										
											2011-05-26 23:06:44 -07:00
										 |  |  | 	struct list_head	xrcd_list; | 
					
						
							| 
									
										
											  
											
												IB/core: Add receive flow steering support
The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side.  Applications
using these QPs must be provided with a method to program some
steering rule with the HW so packets arriving at the local port can be
routed to them.
This patch adds ib_create_flow(), which allow providing a flow
specification for a QP.  When there's a match between the
specification and a received packet, the packet is forwarded to that
QP, in a the same way one uses ib_attach_multicast() for IB UD
multicast handling.
Flow specifications are provided as instances of struct ib_flow_spec_yyy,
which describe L2, L3 and L4 headers.  Currently specs for Ethernet, IPv4,
TCP and UDP are defined.  Flow specs are made of values and masks.
The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.
    struct ib_flow_attr {
            enum ib_flow_attr_type type;
            u16      size;
            u16      priority;
            u32      flags;
            u8       num_of_specs;
            u8       port;
            /* Following are the optional layers according to user request
             * struct ib_flow_spec_yyy
             * struct ib_flow_spec_zzz
             */
    };
As these specs are eventually coming from user space, they are defined and
used in a way which allows adding new spec types without kernel/user ABI
change, just with a little API enhancement which defines the newly added spec.
The flow spec structures are defined with TLV (Type-Length-Value)
entries, which allows calling ib_create_flow() with a list of variable
length of optional specs.
For the actual processing of ib_flow_attr the driver uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.
Steering rules processing order is according to the domain over which
the rule is set and the rule priority.  All rules set by user space
applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains
could be used by future IPoIB RFS and Ethetool flow-steering interface
implementation.  Lower numerical value for the priority field means
higher priority.
The returned value from ib_create_flow() is a struct ib_flow, which
contains a database pointer (handle) provided by the HW driver to be
used when calling ib_destroy_flow().
Applications that offload TCP/IP traffic can also be written over IB
UD QPs.  The ib_create_flow() / ib_destroy_flow() API is designed to
support UD QPs too.  A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING
to denote support for flow steering.
The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:
    IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification
    IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
        all Ethernet traffic which isn't steered to any QP
    IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast
    IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic
ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-08-07 14:01:59 +03:00
										 |  |  | 	/* count times opened, mcast attaches, flow attaches */ | 
					
						
							|  |  |  | 	atomic_t		usecnt; | 
					
						
							| 
									
										
										
										
											2011-08-08 15:31:51 -07:00
										 |  |  | 	struct list_head	open_list; | 
					
						
							|  |  |  | 	struct ib_qp           *real_qp; | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct ib_uobject      *uobject; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	void                  (*event_handler)(struct ib_event *, void *); | 
					
						
							|  |  |  | 	void		       *qp_context; | 
					
						
							|  |  |  | 	u32			qp_num; | 
					
						
							|  |  |  | 	enum ib_qp_type		qp_type; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_mr { | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct ib_device  *device; | 
					
						
							|  |  |  | 	struct ib_pd	  *pd; | 
					
						
							|  |  |  | 	struct ib_uobject *uobject; | 
					
						
							|  |  |  | 	u32		   lkey; | 
					
						
							|  |  |  | 	u32		   rkey; | 
					
						
							|  |  |  | 	atomic_t	   usecnt; /* count number of MWs */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_mw { | 
					
						
							|  |  |  | 	struct ib_device	*device; | 
					
						
							|  |  |  | 	struct ib_pd		*pd; | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct ib_uobject	*uobject; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	u32			rkey; | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | 	enum ib_mw_type         type; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_fmr { | 
					
						
							|  |  |  | 	struct ib_device	*device; | 
					
						
							|  |  |  | 	struct ib_pd		*pd; | 
					
						
							|  |  |  | 	struct list_head	list; | 
					
						
							|  |  |  | 	u32			lkey; | 
					
						
							|  |  |  | 	u32			rkey; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												IB/core: Add receive flow steering support
The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side.  Applications
using these QPs must be provided with a method to program some
steering rule with the HW so packets arriving at the local port can be
routed to them.
This patch adds ib_create_flow(), which allow providing a flow
specification for a QP.  When there's a match between the
specification and a received packet, the packet is forwarded to that
QP, in a the same way one uses ib_attach_multicast() for IB UD
multicast handling.
Flow specifications are provided as instances of struct ib_flow_spec_yyy,
which describe L2, L3 and L4 headers.  Currently specs for Ethernet, IPv4,
TCP and UDP are defined.  Flow specs are made of values and masks.
The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.
    struct ib_flow_attr {
            enum ib_flow_attr_type type;
            u16      size;
            u16      priority;
            u32      flags;
            u8       num_of_specs;
            u8       port;
            /* Following are the optional layers according to user request
             * struct ib_flow_spec_yyy
             * struct ib_flow_spec_zzz
             */
    };
As these specs are eventually coming from user space, they are defined and
used in a way which allows adding new spec types without kernel/user ABI
change, just with a little API enhancement which defines the newly added spec.
The flow spec structures are defined with TLV (Type-Length-Value)
entries, which allows calling ib_create_flow() with a list of variable
length of optional specs.
For the actual processing of ib_flow_attr the driver uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.
Steering rules processing order is according to the domain over which
the rule is set and the rule priority.  All rules set by user space
applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains
could be used by future IPoIB RFS and Ethetool flow-steering interface
implementation.  Lower numerical value for the priority field means
higher priority.
The returned value from ib_create_flow() is a struct ib_flow, which
contains a database pointer (handle) provided by the HW driver to be
used when calling ib_destroy_flow().
Applications that offload TCP/IP traffic can also be written over IB
UD QPs.  The ib_create_flow() / ib_destroy_flow() API is designed to
support UD QPs too.  A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING
to denote support for flow steering.
The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:
    IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification
    IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
        all Ethernet traffic which isn't steered to any QP
    IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast
    IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic
ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-08-07 14:01:59 +03:00
										 |  |  | /* Supported steering options */ | 
					
						
							|  |  |  | enum ib_flow_attr_type { | 
					
						
							|  |  |  | 	/* steering according to rule specifications */ | 
					
						
							|  |  |  | 	IB_FLOW_ATTR_NORMAL		= 0x0, | 
					
						
							|  |  |  | 	/* default unicast and multicast rule -
 | 
					
						
							|  |  |  | 	 * receive all Eth traffic which isn't steered to any QP | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	IB_FLOW_ATTR_ALL_DEFAULT	= 0x1, | 
					
						
							|  |  |  | 	/* default multicast rule -
 | 
					
						
							|  |  |  | 	 * receive all Eth multicast traffic which isn't steered to any QP | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	IB_FLOW_ATTR_MC_DEFAULT		= 0x2, | 
					
						
							|  |  |  | 	/* sniffer rule - receive all port traffic */ | 
					
						
							|  |  |  | 	IB_FLOW_ATTR_SNIFFER		= 0x3 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Supported steering header types */ | 
					
						
							|  |  |  | enum ib_flow_spec_type { | 
					
						
							|  |  |  | 	/* L2 headers*/ | 
					
						
							|  |  |  | 	IB_FLOW_SPEC_ETH	= 0x20, | 
					
						
							| 
									
										
										
										
											2013-11-07 15:25:13 +02:00
										 |  |  | 	IB_FLOW_SPEC_IB		= 0x22, | 
					
						
							| 
									
										
											  
											
												IB/core: Add receive flow steering support
The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side.  Applications
using these QPs must be provided with a method to program some
steering rule with the HW so packets arriving at the local port can be
routed to them.
This patch adds ib_create_flow(), which allow providing a flow
specification for a QP.  When there's a match between the
specification and a received packet, the packet is forwarded to that
QP, in a the same way one uses ib_attach_multicast() for IB UD
multicast handling.
Flow specifications are provided as instances of struct ib_flow_spec_yyy,
which describe L2, L3 and L4 headers.  Currently specs for Ethernet, IPv4,
TCP and UDP are defined.  Flow specs are made of values and masks.
The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.
    struct ib_flow_attr {
            enum ib_flow_attr_type type;
            u16      size;
            u16      priority;
            u32      flags;
            u8       num_of_specs;
            u8       port;
            /* Following are the optional layers according to user request
             * struct ib_flow_spec_yyy
             * struct ib_flow_spec_zzz
             */
    };
As these specs are eventually coming from user space, they are defined and
used in a way which allows adding new spec types without kernel/user ABI
change, just with a little API enhancement which defines the newly added spec.
The flow spec structures are defined with TLV (Type-Length-Value)
entries, which allows calling ib_create_flow() with a list of variable
length of optional specs.
For the actual processing of ib_flow_attr the driver uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.
Steering rules processing order is according to the domain over which
the rule is set and the rule priority.  All rules set by user space
applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains
could be used by future IPoIB RFS and Ethetool flow-steering interface
implementation.  Lower numerical value for the priority field means
higher priority.
The returned value from ib_create_flow() is a struct ib_flow, which
contains a database pointer (handle) provided by the HW driver to be
used when calling ib_destroy_flow().
Applications that offload TCP/IP traffic can also be written over IB
UD QPs.  The ib_create_flow() / ib_destroy_flow() API is designed to
support UD QPs too.  A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING
to denote support for flow steering.
The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:
    IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification
    IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
        all Ethernet traffic which isn't steered to any QP
    IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast
    IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic
ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-08-07 14:01:59 +03:00
										 |  |  | 	/* L3 header*/ | 
					
						
							|  |  |  | 	IB_FLOW_SPEC_IPV4	= 0x30, | 
					
						
							|  |  |  | 	/* L4 headers*/ | 
					
						
							|  |  |  | 	IB_FLOW_SPEC_TCP	= 0x40, | 
					
						
							|  |  |  | 	IB_FLOW_SPEC_UDP	= 0x41 | 
					
						
							|  |  |  | }; | 
					
						
							| 
									
										
										
										
											2013-11-07 15:25:13 +02:00
										 |  |  | #define IB_FLOW_SPEC_LAYER_MASK	0xF0
 | 
					
						
							| 
									
										
										
										
											2013-09-01 18:39:52 +03:00
										 |  |  | #define IB_FLOW_SPEC_SUPPORT_LAYERS 4
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												IB/core: Add receive flow steering support
The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side.  Applications
using these QPs must be provided with a method to program some
steering rule with the HW so packets arriving at the local port can be
routed to them.
This patch adds ib_create_flow(), which allow providing a flow
specification for a QP.  When there's a match between the
specification and a received packet, the packet is forwarded to that
QP, in a the same way one uses ib_attach_multicast() for IB UD
multicast handling.
Flow specifications are provided as instances of struct ib_flow_spec_yyy,
which describe L2, L3 and L4 headers.  Currently specs for Ethernet, IPv4,
TCP and UDP are defined.  Flow specs are made of values and masks.
The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.
    struct ib_flow_attr {
            enum ib_flow_attr_type type;
            u16      size;
            u16      priority;
            u32      flags;
            u8       num_of_specs;
            u8       port;
            /* Following are the optional layers according to user request
             * struct ib_flow_spec_yyy
             * struct ib_flow_spec_zzz
             */
    };
As these specs are eventually coming from user space, they are defined and
used in a way which allows adding new spec types without kernel/user ABI
change, just with a little API enhancement which defines the newly added spec.
The flow spec structures are defined with TLV (Type-Length-Value)
entries, which allows calling ib_create_flow() with a list of variable
length of optional specs.
For the actual processing of ib_flow_attr the driver uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.
Steering rules processing order is according to the domain over which
the rule is set and the rule priority.  All rules set by user space
applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains
could be used by future IPoIB RFS and Ethetool flow-steering interface
implementation.  Lower numerical value for the priority field means
higher priority.
The returned value from ib_create_flow() is a struct ib_flow, which
contains a database pointer (handle) provided by the HW driver to be
used when calling ib_destroy_flow().
Applications that offload TCP/IP traffic can also be written over IB
UD QPs.  The ib_create_flow() / ib_destroy_flow() API is designed to
support UD QPs too.  A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING
to denote support for flow steering.
The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:
    IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification
    IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
        all Ethernet traffic which isn't steered to any QP
    IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast
    IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic
ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-08-07 14:01:59 +03:00
										 |  |  | /* Flow steering rule priority is set according to it's domain.
 | 
					
						
							|  |  |  |  * Lower domain value means higher priority. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | enum ib_flow_domain { | 
					
						
							|  |  |  | 	IB_FLOW_DOMAIN_USER, | 
					
						
							|  |  |  | 	IB_FLOW_DOMAIN_ETHTOOL, | 
					
						
							|  |  |  | 	IB_FLOW_DOMAIN_RFS, | 
					
						
							|  |  |  | 	IB_FLOW_DOMAIN_NIC, | 
					
						
							|  |  |  | 	IB_FLOW_DOMAIN_NUM /* Must be last */ | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_flow_eth_filter { | 
					
						
							|  |  |  | 	u8	dst_mac[6]; | 
					
						
							|  |  |  | 	u8	src_mac[6]; | 
					
						
							|  |  |  | 	__be16	ether_type; | 
					
						
							|  |  |  | 	__be16	vlan_tag; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_flow_spec_eth { | 
					
						
							|  |  |  | 	enum ib_flow_spec_type	  type; | 
					
						
							|  |  |  | 	u16			  size; | 
					
						
							|  |  |  | 	struct ib_flow_eth_filter val; | 
					
						
							|  |  |  | 	struct ib_flow_eth_filter mask; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-07 15:25:13 +02:00
										 |  |  | struct ib_flow_ib_filter { | 
					
						
							|  |  |  | 	__be16 dlid; | 
					
						
							|  |  |  | 	__u8   sl; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_flow_spec_ib { | 
					
						
							|  |  |  | 	enum ib_flow_spec_type	 type; | 
					
						
							|  |  |  | 	u16			 size; | 
					
						
							|  |  |  | 	struct ib_flow_ib_filter val; | 
					
						
							|  |  |  | 	struct ib_flow_ib_filter mask; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												IB/core: Add receive flow steering support
The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side.  Applications
using these QPs must be provided with a method to program some
steering rule with the HW so packets arriving at the local port can be
routed to them.
This patch adds ib_create_flow(), which allow providing a flow
specification for a QP.  When there's a match between the
specification and a received packet, the packet is forwarded to that
QP, in a the same way one uses ib_attach_multicast() for IB UD
multicast handling.
Flow specifications are provided as instances of struct ib_flow_spec_yyy,
which describe L2, L3 and L4 headers.  Currently specs for Ethernet, IPv4,
TCP and UDP are defined.  Flow specs are made of values and masks.
The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.
    struct ib_flow_attr {
            enum ib_flow_attr_type type;
            u16      size;
            u16      priority;
            u32      flags;
            u8       num_of_specs;
            u8       port;
            /* Following are the optional layers according to user request
             * struct ib_flow_spec_yyy
             * struct ib_flow_spec_zzz
             */
    };
As these specs are eventually coming from user space, they are defined and
used in a way which allows adding new spec types without kernel/user ABI
change, just with a little API enhancement which defines the newly added spec.
The flow spec structures are defined with TLV (Type-Length-Value)
entries, which allows calling ib_create_flow() with a list of variable
length of optional specs.
For the actual processing of ib_flow_attr the driver uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.
Steering rules processing order is according to the domain over which
the rule is set and the rule priority.  All rules set by user space
applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains
could be used by future IPoIB RFS and Ethetool flow-steering interface
implementation.  Lower numerical value for the priority field means
higher priority.
The returned value from ib_create_flow() is a struct ib_flow, which
contains a database pointer (handle) provided by the HW driver to be
used when calling ib_destroy_flow().
Applications that offload TCP/IP traffic can also be written over IB
UD QPs.  The ib_create_flow() / ib_destroy_flow() API is designed to
support UD QPs too.  A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING
to denote support for flow steering.
The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:
    IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification
    IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
        all Ethernet traffic which isn't steered to any QP
    IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast
    IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic
ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-08-07 14:01:59 +03:00
										 |  |  | struct ib_flow_ipv4_filter { | 
					
						
							|  |  |  | 	__be32	src_ip; | 
					
						
							|  |  |  | 	__be32	dst_ip; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_flow_spec_ipv4 { | 
					
						
							|  |  |  | 	enum ib_flow_spec_type	   type; | 
					
						
							|  |  |  | 	u16			   size; | 
					
						
							|  |  |  | 	struct ib_flow_ipv4_filter val; | 
					
						
							|  |  |  | 	struct ib_flow_ipv4_filter mask; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_flow_tcp_udp_filter { | 
					
						
							|  |  |  | 	__be16	dst_port; | 
					
						
							|  |  |  | 	__be16	src_port; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_flow_spec_tcp_udp { | 
					
						
							|  |  |  | 	enum ib_flow_spec_type	      type; | 
					
						
							|  |  |  | 	u16			      size; | 
					
						
							|  |  |  | 	struct ib_flow_tcp_udp_filter val; | 
					
						
							|  |  |  | 	struct ib_flow_tcp_udp_filter mask; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | union ib_flow_spec { | 
					
						
							|  |  |  | 	struct { | 
					
						
							|  |  |  | 		enum ib_flow_spec_type	type; | 
					
						
							|  |  |  | 		u16			size; | 
					
						
							|  |  |  | 	}; | 
					
						
							|  |  |  | 	struct ib_flow_spec_eth		eth; | 
					
						
							| 
									
										
										
										
											2013-11-07 15:25:13 +02:00
										 |  |  | 	struct ib_flow_spec_ib		ib; | 
					
						
							| 
									
										
											  
											
												IB/core: Add receive flow steering support
The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side.  Applications
using these QPs must be provided with a method to program some
steering rule with the HW so packets arriving at the local port can be
routed to them.
This patch adds ib_create_flow(), which allow providing a flow
specification for a QP.  When there's a match between the
specification and a received packet, the packet is forwarded to that
QP, in a the same way one uses ib_attach_multicast() for IB UD
multicast handling.
Flow specifications are provided as instances of struct ib_flow_spec_yyy,
which describe L2, L3 and L4 headers.  Currently specs for Ethernet, IPv4,
TCP and UDP are defined.  Flow specs are made of values and masks.
The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.
    struct ib_flow_attr {
            enum ib_flow_attr_type type;
            u16      size;
            u16      priority;
            u32      flags;
            u8       num_of_specs;
            u8       port;
            /* Following are the optional layers according to user request
             * struct ib_flow_spec_yyy
             * struct ib_flow_spec_zzz
             */
    };
As these specs are eventually coming from user space, they are defined and
used in a way which allows adding new spec types without kernel/user ABI
change, just with a little API enhancement which defines the newly added spec.
The flow spec structures are defined with TLV (Type-Length-Value)
entries, which allows calling ib_create_flow() with a list of variable
length of optional specs.
For the actual processing of ib_flow_attr the driver uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.
Steering rules processing order is according to the domain over which
the rule is set and the rule priority.  All rules set by user space
applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains
could be used by future IPoIB RFS and Ethetool flow-steering interface
implementation.  Lower numerical value for the priority field means
higher priority.
The returned value from ib_create_flow() is a struct ib_flow, which
contains a database pointer (handle) provided by the HW driver to be
used when calling ib_destroy_flow().
Applications that offload TCP/IP traffic can also be written over IB
UD QPs.  The ib_create_flow() / ib_destroy_flow() API is designed to
support UD QPs too.  A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING
to denote support for flow steering.
The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:
    IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification
    IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
        all Ethernet traffic which isn't steered to any QP
    IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast
    IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic
ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-08-07 14:01:59 +03:00
										 |  |  | 	struct ib_flow_spec_ipv4        ipv4; | 
					
						
							|  |  |  | 	struct ib_flow_spec_tcp_udp	tcp_udp; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_flow_attr { | 
					
						
							|  |  |  | 	enum ib_flow_attr_type type; | 
					
						
							|  |  |  | 	u16	     size; | 
					
						
							|  |  |  | 	u16	     priority; | 
					
						
							|  |  |  | 	u32	     flags; | 
					
						
							|  |  |  | 	u8	     num_of_specs; | 
					
						
							|  |  |  | 	u8	     port; | 
					
						
							|  |  |  | 	/* Following are the optional layers according to user request
 | 
					
						
							|  |  |  | 	 * struct ib_flow_spec_xxx | 
					
						
							|  |  |  | 	 * struct ib_flow_spec_yyy | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_flow { | 
					
						
							|  |  |  | 	struct ib_qp		*qp; | 
					
						
							|  |  |  | 	struct ib_uobject	*uobject; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_mad; | 
					
						
							|  |  |  | struct ib_grh; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_process_mad_flags { | 
					
						
							|  |  |  | 	IB_MAD_IGNORE_MKEY	= 1, | 
					
						
							|  |  |  | 	IB_MAD_IGNORE_BKEY	= 2, | 
					
						
							|  |  |  | 	IB_MAD_IGNORE_ALL	= IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum ib_mad_result { | 
					
						
							|  |  |  | 	IB_MAD_RESULT_FAILURE  = 0,      /* (!SUCCESS is the important flag) */ | 
					
						
							|  |  |  | 	IB_MAD_RESULT_SUCCESS  = 1 << 0, /* MAD was successfully processed   */ | 
					
						
							|  |  |  | 	IB_MAD_RESULT_REPLY    = 1 << 1, /* Reply packet needs to be sent    */ | 
					
						
							|  |  |  | 	IB_MAD_RESULT_CONSUMED = 1 << 2  /* Packet consumed: stop processing */ | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define IB_DEVICE_NAME_MAX 64
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_cache { | 
					
						
							|  |  |  | 	rwlock_t                lock; | 
					
						
							|  |  |  | 	struct ib_event_handler event_handler; | 
					
						
							|  |  |  | 	struct ib_pkey_cache  **pkey_cache; | 
					
						
							|  |  |  | 	struct ib_gid_cache   **gid_cache; | 
					
						
							| 
									
										
										
										
											2006-06-17 20:37:34 -07:00
										 |  |  | 	u8                     *lmc_cache; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | struct ib_dma_mapping_ops { | 
					
						
							|  |  |  | 	int		(*mapping_error)(struct ib_device *dev, | 
					
						
							|  |  |  | 					 u64 dma_addr); | 
					
						
							|  |  |  | 	u64		(*map_single)(struct ib_device *dev, | 
					
						
							|  |  |  | 				      void *ptr, size_t size, | 
					
						
							|  |  |  | 				      enum dma_data_direction direction); | 
					
						
							|  |  |  | 	void		(*unmap_single)(struct ib_device *dev, | 
					
						
							|  |  |  | 					u64 addr, size_t size, | 
					
						
							|  |  |  | 					enum dma_data_direction direction); | 
					
						
							|  |  |  | 	u64		(*map_page)(struct ib_device *dev, | 
					
						
							|  |  |  | 				    struct page *page, unsigned long offset, | 
					
						
							|  |  |  | 				    size_t size, | 
					
						
							|  |  |  | 				    enum dma_data_direction direction); | 
					
						
							|  |  |  | 	void		(*unmap_page)(struct ib_device *dev, | 
					
						
							|  |  |  | 				      u64 addr, size_t size, | 
					
						
							|  |  |  | 				      enum dma_data_direction direction); | 
					
						
							|  |  |  | 	int		(*map_sg)(struct ib_device *dev, | 
					
						
							|  |  |  | 				  struct scatterlist *sg, int nents, | 
					
						
							|  |  |  | 				  enum dma_data_direction direction); | 
					
						
							|  |  |  | 	void		(*unmap_sg)(struct ib_device *dev, | 
					
						
							|  |  |  | 				    struct scatterlist *sg, int nents, | 
					
						
							|  |  |  | 				    enum dma_data_direction direction); | 
					
						
							|  |  |  | 	u64		(*dma_address)(struct ib_device *dev, | 
					
						
							|  |  |  | 				       struct scatterlist *sg); | 
					
						
							|  |  |  | 	unsigned int	(*dma_len)(struct ib_device *dev, | 
					
						
							|  |  |  | 				   struct scatterlist *sg); | 
					
						
							|  |  |  | 	void		(*sync_single_for_cpu)(struct ib_device *dev, | 
					
						
							|  |  |  | 					       u64 dma_handle, | 
					
						
							|  |  |  | 					       size_t size, | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:44 -07:00
										 |  |  | 					       enum dma_data_direction dir); | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | 	void		(*sync_single_for_device)(struct ib_device *dev, | 
					
						
							|  |  |  | 						  u64 dma_handle, | 
					
						
							|  |  |  | 						  size_t size, | 
					
						
							|  |  |  | 						  enum dma_data_direction dir); | 
					
						
							|  |  |  | 	void		*(*alloc_coherent)(struct ib_device *dev, | 
					
						
							|  |  |  | 					   size_t size, | 
					
						
							|  |  |  | 					   u64 *dma_handle, | 
					
						
							|  |  |  | 					   gfp_t flag); | 
					
						
							|  |  |  | 	void		(*free_coherent)(struct ib_device *dev, | 
					
						
							|  |  |  | 					 size_t size, void *cpu_addr, | 
					
						
							|  |  |  | 					 u64 dma_handle); | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-08-03 16:02:42 -05:00
										 |  |  | struct iw_cm_verbs; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct ib_device { | 
					
						
							|  |  |  | 	struct device                *dma_device; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	char                          name[IB_DEVICE_NAME_MAX]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	struct list_head              event_handler_list; | 
					
						
							|  |  |  | 	spinlock_t                    event_handler_lock; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-02 19:09:16 +00:00
										 |  |  | 	spinlock_t                    client_data_lock; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	struct list_head              core_list; | 
					
						
							|  |  |  | 	struct list_head              client_data_list; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	struct ib_cache               cache; | 
					
						
							| 
									
										
										
										
											2007-05-14 07:26:51 +03:00
										 |  |  | 	int                          *pkey_tbl_len; | 
					
						
							|  |  |  | 	int                          *gid_tbl_len; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-03 13:48:47 +03:00
										 |  |  | 	int			      num_comp_vectors; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-08-03 16:02:42 -05:00
										 |  |  | 	struct iw_cm_verbs	     *iwcm; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:48 -07:00
										 |  |  | 	int		           (*get_protocol_stats)(struct ib_device *device, | 
					
						
							|  |  |  | 							 union rdma_protocol_stats *stats); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int		           (*query_device)(struct ib_device *device, | 
					
						
							|  |  |  | 						   struct ib_device_attr *device_attr); | 
					
						
							|  |  |  | 	int		           (*query_port)(struct ib_device *device, | 
					
						
							|  |  |  | 						 u8 port_num, | 
					
						
							|  |  |  | 						 struct ib_port_attr *port_attr); | 
					
						
							| 
									
										
										
										
											2010-09-27 17:51:10 -07:00
										 |  |  | 	enum rdma_link_layer	   (*get_link_layer)(struct ib_device *device, | 
					
						
							|  |  |  | 						     u8 port_num); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int		           (*query_gid)(struct ib_device *device, | 
					
						
							|  |  |  | 						u8 port_num, int index, | 
					
						
							|  |  |  | 						union ib_gid *gid); | 
					
						
							|  |  |  | 	int		           (*query_pkey)(struct ib_device *device, | 
					
						
							|  |  |  | 						 u8 port_num, u16 index, u16 *pkey); | 
					
						
							|  |  |  | 	int		           (*modify_device)(struct ib_device *device, | 
					
						
							|  |  |  | 						    int device_modify_mask, | 
					
						
							|  |  |  | 						    struct ib_device_modify *device_modify); | 
					
						
							|  |  |  | 	int		           (*modify_port)(struct ib_device *device, | 
					
						
							|  |  |  | 						  u8 port_num, int port_modify_mask, | 
					
						
							|  |  |  | 						  struct ib_port_modify *port_modify); | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct ib_ucontext *       (*alloc_ucontext)(struct ib_device *device, | 
					
						
							|  |  |  | 						     struct ib_udata *udata); | 
					
						
							|  |  |  | 	int                        (*dealloc_ucontext)(struct ib_ucontext *context); | 
					
						
							|  |  |  | 	int                        (*mmap)(struct ib_ucontext *context, | 
					
						
							|  |  |  | 					   struct vm_area_struct *vma); | 
					
						
							|  |  |  | 	struct ib_pd *             (*alloc_pd)(struct ib_device *device, | 
					
						
							|  |  |  | 					       struct ib_ucontext *context, | 
					
						
							|  |  |  | 					       struct ib_udata *udata); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int                        (*dealloc_pd)(struct ib_pd *pd); | 
					
						
							|  |  |  | 	struct ib_ah *             (*create_ah)(struct ib_pd *pd, | 
					
						
							|  |  |  | 						struct ib_ah_attr *ah_attr); | 
					
						
							|  |  |  | 	int                        (*modify_ah)(struct ib_ah *ah, | 
					
						
							|  |  |  | 						struct ib_ah_attr *ah_attr); | 
					
						
							|  |  |  | 	int                        (*query_ah)(struct ib_ah *ah, | 
					
						
							|  |  |  | 					       struct ib_ah_attr *ah_attr); | 
					
						
							|  |  |  | 	int                        (*destroy_ah)(struct ib_ah *ah); | 
					
						
							| 
									
										
										
										
											2005-08-18 12:23:08 -07:00
										 |  |  | 	struct ib_srq *            (*create_srq)(struct ib_pd *pd, | 
					
						
							|  |  |  | 						 struct ib_srq_init_attr *srq_init_attr, | 
					
						
							|  |  |  | 						 struct ib_udata *udata); | 
					
						
							|  |  |  | 	int                        (*modify_srq)(struct ib_srq *srq, | 
					
						
							|  |  |  | 						 struct ib_srq_attr *srq_attr, | 
					
						
							| 
									
										
										
										
											2006-08-11 14:58:09 -07:00
										 |  |  | 						 enum ib_srq_attr_mask srq_attr_mask, | 
					
						
							|  |  |  | 						 struct ib_udata *udata); | 
					
						
							| 
									
										
										
										
											2005-08-18 12:23:08 -07:00
										 |  |  | 	int                        (*query_srq)(struct ib_srq *srq, | 
					
						
							|  |  |  | 						struct ib_srq_attr *srq_attr); | 
					
						
							|  |  |  | 	int                        (*destroy_srq)(struct ib_srq *srq); | 
					
						
							|  |  |  | 	int                        (*post_srq_recv)(struct ib_srq *srq, | 
					
						
							|  |  |  | 						    struct ib_recv_wr *recv_wr, | 
					
						
							|  |  |  | 						    struct ib_recv_wr **bad_recv_wr); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	struct ib_qp *             (*create_qp)(struct ib_pd *pd, | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 						struct ib_qp_init_attr *qp_init_attr, | 
					
						
							|  |  |  | 						struct ib_udata *udata); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int                        (*modify_qp)(struct ib_qp *qp, | 
					
						
							|  |  |  | 						struct ib_qp_attr *qp_attr, | 
					
						
							| 
									
										
										
										
											2006-08-11 14:58:09 -07:00
										 |  |  | 						int qp_attr_mask, | 
					
						
							|  |  |  | 						struct ib_udata *udata); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int                        (*query_qp)(struct ib_qp *qp, | 
					
						
							|  |  |  | 					       struct ib_qp_attr *qp_attr, | 
					
						
							|  |  |  | 					       int qp_attr_mask, | 
					
						
							|  |  |  | 					       struct ib_qp_init_attr *qp_init_attr); | 
					
						
							|  |  |  | 	int                        (*destroy_qp)(struct ib_qp *qp); | 
					
						
							|  |  |  | 	int                        (*post_send)(struct ib_qp *qp, | 
					
						
							|  |  |  | 						struct ib_send_wr *send_wr, | 
					
						
							|  |  |  | 						struct ib_send_wr **bad_send_wr); | 
					
						
							|  |  |  | 	int                        (*post_recv)(struct ib_qp *qp, | 
					
						
							|  |  |  | 						struct ib_recv_wr *recv_wr, | 
					
						
							|  |  |  | 						struct ib_recv_wr **bad_recv_wr); | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct ib_cq *             (*create_cq)(struct ib_device *device, int cqe, | 
					
						
							| 
									
										
										
										
											2007-05-03 13:48:47 +03:00
										 |  |  | 						int comp_vector, | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 						struct ib_ucontext *context, | 
					
						
							|  |  |  | 						struct ib_udata *udata); | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:33 -07:00
										 |  |  | 	int                        (*modify_cq)(struct ib_cq *cq, u16 cq_count, | 
					
						
							|  |  |  | 						u16 cq_period); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int                        (*destroy_cq)(struct ib_cq *cq); | 
					
						
							| 
									
										
										
										
											2006-01-30 14:29:21 -08:00
										 |  |  | 	int                        (*resize_cq)(struct ib_cq *cq, int cqe, | 
					
						
							|  |  |  | 						struct ib_udata *udata); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int                        (*poll_cq)(struct ib_cq *cq, int num_entries, | 
					
						
							|  |  |  | 					      struct ib_wc *wc); | 
					
						
							|  |  |  | 	int                        (*peek_cq)(struct ib_cq *cq, int wc_cnt); | 
					
						
							|  |  |  | 	int                        (*req_notify_cq)(struct ib_cq *cq, | 
					
						
							| 
									
										
											  
											
												IB: Return "maybe missed event" hint from ib_req_notify_cq()
The semantics defined by the InfiniBand specification say that
completion events are only generated when a completions is added to a
completion queue (CQ) after completion notification is requested.  In
other words, this means that the following race is possible:
	while (CQ is not empty)
		ib_poll_cq(CQ);
	// new completion is added after while loop is exited
	ib_req_notify_cq(CQ);
	// no event is generated for the existing completion
To close this race, the IB spec recommends doing another poll of the
CQ after requesting notification.
However, it is not always possible to arrange code this way (for
example, we have found that NAPI for IPoIB cannot poll after
requesting notification).  Also, some hardware (eg Mellanox HCAs)
actually will generate an event for completions added before the call
to ib_req_notify_cq() -- which is allowed by the spec, since there's
no way for any upper-layer consumer to know exactly when a completion
was really added -- so the extra poll of the CQ is just a waste.
Motivated by this, we add a new flag "IB_CQ_REPORT_MISSED_EVENTS" for
ib_req_notify_cq() so that it can return a hint about whether the a
completion may have been added before the request for notification.
The return value of ib_req_notify_cq() is extended so:
	 < 0	means an error occurred while requesting notification
	== 0	means notification was requested successfully, and if
		IB_CQ_REPORT_MISSED_EVENTS was passed in, then no
		events were missed and it is safe to wait for another
		event.
	 > 0	is only returned if IB_CQ_REPORT_MISSED_EVENTS was
		passed in.  It means that the consumer must poll the
		CQ again to make sure it is empty to avoid the race
		described above.
We add a flag to enable this behavior rather than turning it on
unconditionally, because checking for missed events may incur
significant overhead for some low-level drivers, and consumers that
don't care about the results of this test shouldn't be forced to pay
for the test.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2007-05-06 21:02:48 -07:00
										 |  |  | 						    enum ib_cq_notify_flags flags); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int                        (*req_ncomp_notif)(struct ib_cq *cq, | 
					
						
							|  |  |  | 						      int wc_cnt); | 
					
						
							|  |  |  | 	struct ib_mr *             (*get_dma_mr)(struct ib_pd *pd, | 
					
						
							|  |  |  | 						 int mr_access_flags); | 
					
						
							|  |  |  | 	struct ib_mr *             (*reg_phys_mr)(struct ib_pd *pd, | 
					
						
							|  |  |  | 						  struct ib_phys_buf *phys_buf_array, | 
					
						
							|  |  |  | 						  int num_phys_buf, | 
					
						
							|  |  |  | 						  int mr_access_flags, | 
					
						
							|  |  |  | 						  u64 *iova_start); | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct ib_mr *             (*reg_user_mr)(struct ib_pd *pd, | 
					
						
							| 
									
										
										
										
											2007-03-04 16:15:11 -08:00
										 |  |  | 						  u64 start, u64 length, | 
					
						
							|  |  |  | 						  u64 virt_addr, | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 						  int mr_access_flags, | 
					
						
							|  |  |  | 						  struct ib_udata *udata); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int                        (*query_mr)(struct ib_mr *mr, | 
					
						
							|  |  |  | 					       struct ib_mr_attr *mr_attr); | 
					
						
							|  |  |  | 	int                        (*dereg_mr)(struct ib_mr *mr); | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:45 -07:00
										 |  |  | 	struct ib_mr *		   (*alloc_fast_reg_mr)(struct ib_pd *pd, | 
					
						
							|  |  |  | 					       int max_page_list_len); | 
					
						
							|  |  |  | 	struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, | 
					
						
							|  |  |  | 								   int page_list_len); | 
					
						
							|  |  |  | 	void			   (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int                        (*rereg_phys_mr)(struct ib_mr *mr, | 
					
						
							|  |  |  | 						    int mr_rereg_mask, | 
					
						
							|  |  |  | 						    struct ib_pd *pd, | 
					
						
							|  |  |  | 						    struct ib_phys_buf *phys_buf_array, | 
					
						
							|  |  |  | 						    int num_phys_buf, | 
					
						
							|  |  |  | 						    int mr_access_flags, | 
					
						
							|  |  |  | 						    u64 *iova_start); | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | 	struct ib_mw *             (*alloc_mw)(struct ib_pd *pd, | 
					
						
							|  |  |  | 					       enum ib_mw_type type); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int                        (*bind_mw)(struct ib_qp *qp, | 
					
						
							|  |  |  | 					      struct ib_mw *mw, | 
					
						
							|  |  |  | 					      struct ib_mw_bind *mw_bind); | 
					
						
							|  |  |  | 	int                        (*dealloc_mw)(struct ib_mw *mw); | 
					
						
							|  |  |  | 	struct ib_fmr *	           (*alloc_fmr)(struct ib_pd *pd, | 
					
						
							|  |  |  | 						int mr_access_flags, | 
					
						
							|  |  |  | 						struct ib_fmr_attr *fmr_attr); | 
					
						
							|  |  |  | 	int		           (*map_phys_fmr)(struct ib_fmr *fmr, | 
					
						
							|  |  |  | 						   u64 *page_list, int list_len, | 
					
						
							|  |  |  | 						   u64 iova); | 
					
						
							|  |  |  | 	int		           (*unmap_fmr)(struct list_head *fmr_list); | 
					
						
							|  |  |  | 	int		           (*dealloc_fmr)(struct ib_fmr *fmr); | 
					
						
							|  |  |  | 	int                        (*attach_mcast)(struct ib_qp *qp, | 
					
						
							|  |  |  | 						   union ib_gid *gid, | 
					
						
							|  |  |  | 						   u16 lid); | 
					
						
							|  |  |  | 	int                        (*detach_mcast)(struct ib_qp *qp, | 
					
						
							|  |  |  | 						   union ib_gid *gid, | 
					
						
							|  |  |  | 						   u16 lid); | 
					
						
							|  |  |  | 	int                        (*process_mad)(struct ib_device *device, | 
					
						
							|  |  |  | 						  int process_mad_flags, | 
					
						
							|  |  |  | 						  u8 port_num, | 
					
						
							|  |  |  | 						  struct ib_wc *in_wc, | 
					
						
							|  |  |  | 						  struct ib_grh *in_grh, | 
					
						
							|  |  |  | 						  struct ib_mad *in_mad, | 
					
						
							|  |  |  | 						  struct ib_mad *out_mad); | 
					
						
							| 
									
										
										
										
											2011-05-23 17:52:46 -07:00
										 |  |  | 	struct ib_xrcd *	   (*alloc_xrcd)(struct ib_device *device, | 
					
						
							|  |  |  | 						 struct ib_ucontext *ucontext, | 
					
						
							|  |  |  | 						 struct ib_udata *udata); | 
					
						
							|  |  |  | 	int			   (*dealloc_xrcd)(struct ib_xrcd *xrcd); | 
					
						
							| 
									
										
											  
											
												IB/core: Add receive flow steering support
The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side.  Applications
using these QPs must be provided with a method to program some
steering rule with the HW so packets arriving at the local port can be
routed to them.
This patch adds ib_create_flow(), which allow providing a flow
specification for a QP.  When there's a match between the
specification and a received packet, the packet is forwarded to that
QP, in a the same way one uses ib_attach_multicast() for IB UD
multicast handling.
Flow specifications are provided as instances of struct ib_flow_spec_yyy,
which describe L2, L3 and L4 headers.  Currently specs for Ethernet, IPv4,
TCP and UDP are defined.  Flow specs are made of values and masks.
The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.
    struct ib_flow_attr {
            enum ib_flow_attr_type type;
            u16      size;
            u16      priority;
            u32      flags;
            u8       num_of_specs;
            u8       port;
            /* Following are the optional layers according to user request
             * struct ib_flow_spec_yyy
             * struct ib_flow_spec_zzz
             */
    };
As these specs are eventually coming from user space, they are defined and
used in a way which allows adding new spec types without kernel/user ABI
change, just with a little API enhancement which defines the newly added spec.
The flow spec structures are defined with TLV (Type-Length-Value)
entries, which allows calling ib_create_flow() with a list of variable
length of optional specs.
For the actual processing of ib_flow_attr the driver uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.
Steering rules processing order is according to the domain over which
the rule is set and the rule priority.  All rules set by user space
applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains
could be used by future IPoIB RFS and Ethetool flow-steering interface
implementation.  Lower numerical value for the priority field means
higher priority.
The returned value from ib_create_flow() is a struct ib_flow, which
contains a database pointer (handle) provided by the HW driver to be
used when calling ib_destroy_flow().
Applications that offload TCP/IP traffic can also be written over IB
UD QPs.  The ib_create_flow() / ib_destroy_flow() API is designed to
support UD QPs too.  A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING
to denote support for flow steering.
The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:
    IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification
    IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
        all Ethernet traffic which isn't steered to any QP
    IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast
    IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic
ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-08-07 14:01:59 +03:00
										 |  |  | 	struct ib_flow *	   (*create_flow)(struct ib_qp *qp, | 
					
						
							|  |  |  | 						  struct ib_flow_attr | 
					
						
							|  |  |  | 						  *flow_attr, | 
					
						
							|  |  |  | 						  int domain); | 
					
						
							|  |  |  | 	int			   (*destroy_flow)(struct ib_flow *flow_id); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | 	struct ib_dma_mapping_ops   *dma_ops; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | 	struct module               *owner; | 
					
						
							| 
									
										
										
										
											2008-02-22 00:13:36 +01:00
										 |  |  | 	struct device                dev; | 
					
						
							| 
									
										
										
										
											2007-12-17 15:54:39 -04:00
										 |  |  | 	struct kobject               *ports_parent; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	struct list_head             port_list; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	enum { | 
					
						
							|  |  |  | 		IB_DEV_UNINITIALIZED, | 
					
						
							|  |  |  | 		IB_DEV_REGISTERED, | 
					
						
							|  |  |  | 		IB_DEV_UNREGISTERED | 
					
						
							|  |  |  | 	}                            reg_state; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-09-29 14:17:48 -07:00
										 |  |  | 	int			     uverbs_abi_ver; | 
					
						
							| 
									
										
										
										
											2010-02-02 19:09:16 +00:00
										 |  |  | 	u64			     uverbs_cmd_mask; | 
					
						
							| 
									
										
											  
											
												IB/core: extended command: an improved infrastructure for uverbs commands
Commit 400dbc96583f ("IB/core: Infrastructure for extensible uverbs
commands") added an infrastructure for extensible uverbs commands
while later commit 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow
through uverbs") exported ib_create_flow()/ib_destroy_flow() functions
using this new infrastructure.
According to the commit 400dbc96583f, the purpose of this
infrastructure is to support passing around provider (eg. hardware)
specific buffers when userspace issue commands to the kernel, so that
it would be possible to extend uverbs (eg. core) buffers independently
from the provider buffers.
But the new kernel command function prototypes were not modified to
take advantage of this extension. This issue was exposed by Roland
Dreier in a previous review[1].
So the following patch is an attempt to a revised extensible command
infrastructure.
This improved extensible command infrastructure distinguish between
core (eg. legacy)'s command/response buffers from provider
(eg. hardware)'s command/response buffers: each extended command
implementing function is given a struct ib_udata to hold core
(eg. uverbs) input and output buffers, and another struct ib_udata to
hold the hw (eg. provider) input and output buffers.
Having those buffers identified separately make it easier to increase
one buffer to support extension without having to add some code to
guess the exact size of each command/response parts: This should make
the extended functions more reliable.
Additionally, instead of relying on command identifier being greater
than IB_USER_VERBS_CMD_THRESHOLD, the proposed infrastructure rely on
unused bits in command field: on the 32 bits provided by command
field, only 6 bits are really needed to encode the identifier of
commands currently supported by the kernel. (Even using only 6 bits
leaves room for about 23 new commands).
So this patch makes use of some high order bits in command field to
store flags, leaving enough room for more command identifiers than one
will ever need (eg. 256).
The new flags are used to specify if the command should be processed
as an extended one or a legacy one. While designing the new command
format, care was taken to make usage of flags itself extensible.
Using high order bits of the commands field ensure that newer
libibverbs on older kernel will properly fail when trying to call
extended commands. On the other hand, older libibverbs on newer kernel
will never be able to issue calls to extended commands.
The extended command header includes the optional response pointer so
that output buffer length and output buffer pointer are located
together in the command, allowing proper parameters checking. This
should make implementing functions easier and safer.
Additionally the extended header ensure 64bits alignment, while making
all sizes multiple of 8 bytes, extending the maximum buffer size:
                             legacy      extended
   Maximum command buffer:  256KBytes   1024KBytes (512KBytes + 512KBytes)
  Maximum response buffer:  256KBytes   1024KBytes (512KBytes + 512KBytes)
For the purpose of doing proper buffer size accounting, the headers
size are no more taken in account in "in_words".
One of the odds of the current extensible infrastructure, reading
twice the "legacy" command header, is fixed by removing the "legacy"
command header from the extended command header: they are processed as
two different parts of the command: memory is read once and
information are not duplicated: it's making clear that's an extended
command scheme and not a different command scheme.
The proposed scheme will format input (command) and output (response)
buffers this way:
- command:
  legacy header +
  extended header +
  command data (core + hw):
    +----------------------------------------+
    | flags     |   00      00    |  command |
    |        in_words    |   out_words       |
    +----------------------------------------+
    |                 response               |
    |                 response               |
    | provider_in_words | provider_out_words |
    |                 padding                |
    +----------------------------------------+
    |                                        |
    .              <uverbs input>            .
    .              (in_words * 8)            .
    |                                        |
    +----------------------------------------+
    |                                        |
    .             <provider input>           .
    .          (provider_in_words * 8)       .
    |                                        |
    +----------------------------------------+
- response, if present:
    +----------------------------------------+
    |                                        |
    .          <uverbs output space>         .
    .             (out_words * 8)            .
    |                                        |
    +----------------------------------------+
    |                                        |
    .         <provider output space>        .
    .         (provider_out_words * 8)       .
    |                                        |
    +----------------------------------------+
The overall design is to ensure that the extensible infrastructure is
itself extensible while begin more reliable with more input and bound
checking.
Note:
The unused field in the extended header would be perfect candidate to
hold the command "comp_mask" (eg. bit field used to handle
compatibility).  This was suggested by Roland Dreier in a previous
review[2].  But "comp_mask" field is likely to be present in the uverb
input and/or provider input, likewise for the response, as noted by
Matan Barak[3], so it doesn't make sense to put "comp_mask" in the
header.
[1]:
http://marc.info/?i=CAL1RGDWxmM17W2o_era24A-TTDeKyoL6u3NRu_=t_dhV_ZA9MA@mail.gmail.com
[2]:
http://marc.info/?i=CAL1RGDXJtrc849M6_XNZT5xO1+ybKtLWGq6yg6LhoSsKpsmkYA@mail.gmail.com
[3]:
http://marc.info/?i=525C1149.6000701@mellanox.com
Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Link: http://marc.info/?i=cover.1383773832.git.ydroneaud@opteya.com
[ Convert "ret ? ret : 0" to the equivalent "ret".  - Roland ]
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-11-06 23:21:49 +01:00
										 |  |  | 	u64			     uverbs_ex_cmd_mask; | 
					
						
							| 
									
										
										
										
											2005-09-29 14:17:48 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-02-02 09:47:14 -08:00
										 |  |  | 	char			     node_desc[64]; | 
					
						
							| 
									
										
										
										
											2006-01-10 07:39:34 -08:00
										 |  |  | 	__be64			     node_guid; | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:53 -07:00
										 |  |  | 	u32			     local_dma_lkey; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	u8                           node_type; | 
					
						
							|  |  |  | 	u8                           phys_port_cnt; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_client { | 
					
						
							|  |  |  | 	char  *name; | 
					
						
							|  |  |  | 	void (*add)   (struct ib_device *); | 
					
						
							|  |  |  | 	void (*remove)(struct ib_device *); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	struct list_head list; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ib_device *ib_alloc_device(size_t size); | 
					
						
							|  |  |  | void ib_dealloc_device(struct ib_device *device); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-06 17:03:25 -07:00
										 |  |  | int ib_register_device(struct ib_device *device, | 
					
						
							|  |  |  | 		       int (*port_callback)(struct ib_device *, | 
					
						
							|  |  |  | 					    u8, struct kobject *)); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | void ib_unregister_device(struct ib_device *device); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int ib_register_client   (struct ib_client *client); | 
					
						
							|  |  |  | void ib_unregister_client(struct ib_client *client); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void *ib_get_client_data(struct ib_device *device, struct ib_client *client); | 
					
						
							|  |  |  | void  ib_set_client_data(struct ib_device *device, struct ib_client *client, | 
					
						
							|  |  |  | 			 void *data); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-07-07 17:57:10 -07:00
										 |  |  | static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-02-13 12:48:12 -08:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_modify_qp_is_ok - Check that the supplied attribute mask | 
					
						
							|  |  |  |  * contains all required attributes and no attributes not allowed for | 
					
						
							|  |  |  |  * the given QP state transition. | 
					
						
							|  |  |  |  * @cur_state: Current QP state | 
					
						
							|  |  |  |  * @next_state: Next QP state | 
					
						
							|  |  |  |  * @type: QP type | 
					
						
							|  |  |  |  * @mask: Mask of supplied QP attributes | 
					
						
							| 
									
										
											  
											
												IB/core: Ethernet L2 attributes in verbs/cm structures
This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.
When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.
Thus, those attributes were added to the following structures:
* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id
For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.
On the active side, the CM fills. its internal structures from the
path provided by the ULP.  We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).
On the passive side, the CM fills its internal structures from the WC
associated with the REQ message.  We add there taking the ETH L2
attributes from the WC.
When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.
ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB.  Vendor drivers are modified to support the new
function signature.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-12-12 18:03:11 +02:00
										 |  |  |  * @ll : link layer of port | 
					
						
							| 
									
										
										
										
											2006-02-13 12:48:12 -08:00
										 |  |  |  * | 
					
						
							|  |  |  |  * This function is a helper function that a low-level driver's | 
					
						
							|  |  |  |  * modify_qp method can use to validate the consumer's input.  It | 
					
						
							|  |  |  |  * checks that cur_state and next_state are valid QP states, that a | 
					
						
							|  |  |  |  * transition from cur_state to next_state is allowed by the IB spec, | 
					
						
							|  |  |  |  * and that the attribute mask supplied is allowed for the transition. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, | 
					
						
							| 
									
										
											  
											
												IB/core: Ethernet L2 attributes in verbs/cm structures
This patch add the support for Ethernet L2 attributes in the
verbs/cm/cma structures.
When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority
in a similar manner that the IB L2 (and the L4 PKEY) attributes are used.
Thus, those attributes were added to the following structures:
* ib_ah_attr - added dmac
* ib_qp_attr - added smac and vlan_id, (sl remains vlan priority)
* ib_wc - added smac, vlan_id
* ib_sa_path_rec - added smac, dmac, vlan_id
* cm_av - added smac and vlan_id
For the path record structure, extra care was taken to avoid the new
fields when packing it into wire format, so we don't break the IB CM
and SA wire protocol.
On the active side, the CM fills. its internal structures from the
path provided by the ULP.  We add there taking the ETH L2 attributes
and placing them into the CM Address Handle (struct cm_av).
On the passive side, the CM fills its internal structures from the WC
associated with the REQ message.  We add there taking the ETH L2
attributes from the WC.
When the HW driver provides the required ETH L2 attributes in the WC,
they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core
code checks for the presence of these flags, and in their absence does
address resolution from the ib_init_ah_from_wc() helper function.
ib_modify_qp_is_ok is also updated to consider the link layer. Some
parameters are mandatory for Ethernet link layer, while they are
irrelevant for IB.  Vendor drivers are modified to support the new
function signature.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-12-12 18:03:11 +02:00
										 |  |  | 		       enum ib_qp_type type, enum ib_qp_attr_mask mask, | 
					
						
							|  |  |  | 		       enum rdma_link_layer ll); | 
					
						
							| 
									
										
										
										
											2006-02-13 12:48:12 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | int ib_register_event_handler  (struct ib_event_handler *event_handler); | 
					
						
							|  |  |  | int ib_unregister_event_handler(struct ib_event_handler *event_handler); | 
					
						
							|  |  |  | void ib_dispatch_event(struct ib_event *event); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int ib_query_device(struct ib_device *device, | 
					
						
							|  |  |  | 		    struct ib_device_attr *device_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int ib_query_port(struct ib_device *device, | 
					
						
							|  |  |  | 		  u8 port_num, struct ib_port_attr *port_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-09-27 17:51:10 -07:00
										 |  |  | enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, | 
					
						
							|  |  |  | 					       u8 port_num); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | int ib_query_gid(struct ib_device *device, | 
					
						
							|  |  |  | 		 u8 port_num, int index, union ib_gid *gid); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int ib_query_pkey(struct ib_device *device, | 
					
						
							|  |  |  | 		  u8 port_num, u16 index, u16 *pkey); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int ib_modify_device(struct ib_device *device, | 
					
						
							|  |  |  | 		     int device_modify_mask, | 
					
						
							|  |  |  | 		     struct ib_device_modify *device_modify); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int ib_modify_port(struct ib_device *device, | 
					
						
							|  |  |  | 		   u8 port_num, int port_modify_mask, | 
					
						
							|  |  |  | 		   struct ib_port_modify *port_modify); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-14 07:26:51 +03:00
										 |  |  | int ib_find_gid(struct ib_device *device, union ib_gid *gid, | 
					
						
							|  |  |  | 		u8 *port_num, u16 *index); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int ib_find_pkey(struct ib_device *device, | 
					
						
							|  |  |  | 		 u8 port_num, u16 pkey, u16 *index); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_alloc_pd - Allocates an unused protection domain. | 
					
						
							|  |  |  |  * @device: The device on which to allocate the protection domain. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * A protection domain object provides an association between QPs, shared | 
					
						
							|  |  |  |  * receive queues, address handles, memory regions, and memory windows. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_pd *ib_alloc_pd(struct ib_device *device); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dealloc_pd - Deallocates a protection domain. | 
					
						
							|  |  |  |  * @pd: The protection domain to deallocate. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_dealloc_pd(struct ib_pd *pd); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_create_ah - Creates an address handle for the given address vector. | 
					
						
							|  |  |  |  * @pd: The protection domain associated with the address handle. | 
					
						
							|  |  |  |  * @ah_attr: The attributes of the address vector. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The address handle is used to reference a local or global destination | 
					
						
							|  |  |  |  * in all UD QP post sends. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-06-17 20:37:39 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_init_ah_from_wc - Initializes address handle attributes from a | 
					
						
							|  |  |  |  *   work completion. | 
					
						
							|  |  |  |  * @device: Device on which the received message arrived. | 
					
						
							|  |  |  |  * @port_num: Port on which the received message arrived. | 
					
						
							|  |  |  |  * @wc: Work completion associated with the received message. | 
					
						
							|  |  |  |  * @grh: References the received global route header.  This parameter is | 
					
						
							|  |  |  |  *   ignored unless the work completion indicates that the GRH is valid. | 
					
						
							|  |  |  |  * @ah_attr: Returned attributes that can be used when creating an address | 
					
						
							|  |  |  |  *   handle for replying to the message. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, | 
					
						
							|  |  |  | 		       struct ib_grh *grh, struct ib_ah_attr *ah_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-07-27 11:45:34 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_create_ah_from_wc - Creates an address handle associated with the | 
					
						
							|  |  |  |  *   sender of the specified work completion. | 
					
						
							|  |  |  |  * @pd: The protection domain associated with the address handle. | 
					
						
							|  |  |  |  * @wc: Work completion information associated with a received message. | 
					
						
							|  |  |  |  * @grh: References the received global route header.  This parameter is | 
					
						
							|  |  |  |  *   ignored unless the work completion indicates that the GRH is valid. | 
					
						
							|  |  |  |  * @port_num: The outbound port number to associate with the address. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The address handle is used to reference a local or global destination | 
					
						
							|  |  |  |  * in all UD QP post sends. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, | 
					
						
							|  |  |  | 				   struct ib_grh *grh, u8 port_num); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_modify_ah - Modifies the address vector associated with an address | 
					
						
							|  |  |  |  *   handle. | 
					
						
							|  |  |  |  * @ah: The address handle to modify. | 
					
						
							|  |  |  |  * @ah_attr: The new address vector attributes to associate with the | 
					
						
							|  |  |  |  *   address handle. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_query_ah - Queries the address vector associated with an address | 
					
						
							|  |  |  |  *   handle. | 
					
						
							|  |  |  |  * @ah: The address handle to query. | 
					
						
							|  |  |  |  * @ah_attr: The address vector attributes associated with the address | 
					
						
							|  |  |  |  *   handle. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_destroy_ah - Destroys an address handle. | 
					
						
							|  |  |  |  * @ah: The address handle to destroy. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_destroy_ah(struct ib_ah *ah); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-08-18 12:23:08 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_create_srq - Creates a SRQ associated with the specified protection | 
					
						
							|  |  |  |  *   domain. | 
					
						
							|  |  |  |  * @pd: The protection domain associated with the SRQ. | 
					
						
							| 
									
										
										
										
											2006-02-23 12:13:51 -08:00
										 |  |  |  * @srq_init_attr: A list of initial attributes required to create the | 
					
						
							|  |  |  |  *   SRQ.  If SRQ creation succeeds, then the attributes are updated to | 
					
						
							|  |  |  |  *   the actual capabilities of the created SRQ. | 
					
						
							| 
									
										
										
										
											2005-08-18 12:23:08 -07:00
										 |  |  |  * | 
					
						
							|  |  |  |  * srq_attr->max_wr and srq_attr->max_sge are read the determine the | 
					
						
							|  |  |  |  * requested size of the SRQ, and set to the actual values allocated | 
					
						
							|  |  |  |  * on return.  If ib_create_srq() succeeds, then max_wr and max_sge | 
					
						
							|  |  |  |  * will always be at least as large as the requested values. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_srq *ib_create_srq(struct ib_pd *pd, | 
					
						
							|  |  |  | 			     struct ib_srq_init_attr *srq_init_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_modify_srq - Modifies the attributes for the specified SRQ. | 
					
						
							|  |  |  |  * @srq: The SRQ to modify. | 
					
						
							|  |  |  |  * @srq_attr: On input, specifies the SRQ attributes to modify.  On output, | 
					
						
							|  |  |  |  *   the current values of selected SRQ attributes are returned. | 
					
						
							|  |  |  |  * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ | 
					
						
							|  |  |  |  *   are being modified. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or | 
					
						
							|  |  |  |  * IB_SRQ_LIMIT to set the SRQ's limit and request notification when | 
					
						
							|  |  |  |  * the number of receives queued drops below the limit. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_modify_srq(struct ib_srq *srq, | 
					
						
							|  |  |  | 		  struct ib_srq_attr *srq_attr, | 
					
						
							|  |  |  | 		  enum ib_srq_attr_mask srq_attr_mask); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_query_srq - Returns the attribute list and current values for the | 
					
						
							|  |  |  |  *   specified SRQ. | 
					
						
							|  |  |  |  * @srq: The SRQ to query. | 
					
						
							|  |  |  |  * @srq_attr: The attributes of the specified SRQ. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_query_srq(struct ib_srq *srq, | 
					
						
							|  |  |  | 		 struct ib_srq_attr *srq_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_destroy_srq - Destroys the specified SRQ. | 
					
						
							|  |  |  |  * @srq: The SRQ to destroy. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_destroy_srq(struct ib_srq *srq); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. | 
					
						
							|  |  |  |  * @srq: The SRQ to post the work request on. | 
					
						
							|  |  |  |  * @recv_wr: A list of work requests to post on the receive queue. | 
					
						
							|  |  |  |  * @bad_recv_wr: On an immediate failure, this parameter will reference | 
					
						
							|  |  |  |  *   the work request that failed to be posted on the QP. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline int ib_post_srq_recv(struct ib_srq *srq, | 
					
						
							|  |  |  | 				   struct ib_recv_wr *recv_wr, | 
					
						
							|  |  |  | 				   struct ib_recv_wr **bad_recv_wr) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_create_qp - Creates a QP associated with the specified protection | 
					
						
							|  |  |  |  *   domain. | 
					
						
							|  |  |  |  * @pd: The protection domain associated with the QP. | 
					
						
							| 
									
										
										
										
											2006-02-23 12:13:51 -08:00
										 |  |  |  * @qp_init_attr: A list of initial attributes required to create the | 
					
						
							|  |  |  |  *   QP.  If QP creation succeeds, then the attributes are updated to | 
					
						
							|  |  |  |  *   the actual capabilities of the created QP. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							|  |  |  | struct ib_qp *ib_create_qp(struct ib_pd *pd, | 
					
						
							|  |  |  | 			   struct ib_qp_init_attr *qp_init_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_modify_qp - Modifies the attributes for the specified QP and then | 
					
						
							|  |  |  |  *   transitions the QP to the given state. | 
					
						
							|  |  |  |  * @qp: The QP to modify. | 
					
						
							|  |  |  |  * @qp_attr: On input, specifies the QP attributes to modify.  On output, | 
					
						
							|  |  |  |  *   the current values of selected QP attributes are returned. | 
					
						
							|  |  |  |  * @qp_attr_mask: A bit-mask used to specify which attributes of the QP | 
					
						
							|  |  |  |  *   are being modified. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_modify_qp(struct ib_qp *qp, | 
					
						
							|  |  |  | 		 struct ib_qp_attr *qp_attr, | 
					
						
							|  |  |  | 		 int qp_attr_mask); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_query_qp - Returns the attribute list and current values for the | 
					
						
							|  |  |  |  *   specified QP. | 
					
						
							|  |  |  |  * @qp: The QP to query. | 
					
						
							|  |  |  |  * @qp_attr: The attributes of the specified QP. | 
					
						
							|  |  |  |  * @qp_attr_mask: A bit-mask used to select specific attributes to query. | 
					
						
							|  |  |  |  * @qp_init_attr: Additional attributes of the selected QP. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The qp_attr_mask may be used to limit the query to gathering only the | 
					
						
							|  |  |  |  * selected attributes. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_query_qp(struct ib_qp *qp, | 
					
						
							|  |  |  | 		struct ib_qp_attr *qp_attr, | 
					
						
							|  |  |  | 		int qp_attr_mask, | 
					
						
							|  |  |  | 		struct ib_qp_init_attr *qp_init_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_destroy_qp - Destroys the specified QP. | 
					
						
							|  |  |  |  * @qp: The QP to destroy. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_destroy_qp(struct ib_qp *qp); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-05-26 23:06:44 -07:00
										 |  |  | /**
 | 
					
						
							| 
									
										
										
										
											2011-08-08 15:31:51 -07:00
										 |  |  |  * ib_open_qp - Obtain a reference to an existing sharable QP. | 
					
						
							|  |  |  |  * @xrcd - XRC domain | 
					
						
							|  |  |  |  * @qp_open_attr: Attributes identifying the QP to open. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Returns a reference to a sharable QP. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, | 
					
						
							|  |  |  | 			 struct ib_qp_open_attr *qp_open_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_close_qp - Release an external reference to a QP. | 
					
						
							| 
									
										
										
										
											2011-05-26 23:06:44 -07:00
										 |  |  |  * @qp: The QP handle to release | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2011-08-08 15:31:51 -07:00
										 |  |  |  * The opened QP handle is released by the caller.  The underlying | 
					
						
							|  |  |  |  * shared QP is not destroyed until all internal references are released. | 
					
						
							| 
									
										
										
										
											2011-05-26 23:06:44 -07:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2011-08-08 15:31:51 -07:00
										 |  |  | int ib_close_qp(struct ib_qp *qp); | 
					
						
							| 
									
										
										
										
											2011-05-26 23:06:44 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_post_send - Posts a list of work requests to the send queue of | 
					
						
							|  |  |  |  *   the specified QP. | 
					
						
							|  |  |  |  * @qp: The QP to post the work request on. | 
					
						
							|  |  |  |  * @send_wr: A list of work requests to post on the send queue. | 
					
						
							|  |  |  |  * @bad_send_wr: On an immediate failure, this parameter will reference | 
					
						
							|  |  |  |  *   the work request that failed to be posted on the QP. | 
					
						
							| 
									
										
										
										
											2009-12-09 14:20:04 -08:00
										 |  |  |  * | 
					
						
							|  |  |  |  * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate | 
					
						
							|  |  |  |  * error is returned, the QP state shall not be affected, | 
					
						
							|  |  |  |  * ib_post_send() will return an immediate error after queueing any | 
					
						
							|  |  |  |  * earlier work requests in the list. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							|  |  |  | static inline int ib_post_send(struct ib_qp *qp, | 
					
						
							|  |  |  | 			       struct ib_send_wr *send_wr, | 
					
						
							|  |  |  | 			       struct ib_send_wr **bad_send_wr) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return qp->device->post_send(qp, send_wr, bad_send_wr); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_post_recv - Posts a list of work requests to the receive queue of | 
					
						
							|  |  |  |  *   the specified QP. | 
					
						
							|  |  |  |  * @qp: The QP to post the work request on. | 
					
						
							|  |  |  |  * @recv_wr: A list of work requests to post on the receive queue. | 
					
						
							|  |  |  |  * @bad_recv_wr: On an immediate failure, this parameter will reference | 
					
						
							|  |  |  |  *   the work request that failed to be posted on the QP. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline int ib_post_recv(struct ib_qp *qp, | 
					
						
							|  |  |  | 			       struct ib_recv_wr *recv_wr, | 
					
						
							|  |  |  | 			       struct ib_recv_wr **bad_recv_wr) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return qp->device->post_recv(qp, recv_wr, bad_recv_wr); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_create_cq - Creates a CQ on the specified device. | 
					
						
							|  |  |  |  * @device: The device on which to create the CQ. | 
					
						
							|  |  |  |  * @comp_handler: A user-specified callback that is invoked when a | 
					
						
							|  |  |  |  *   completion event occurs on the CQ. | 
					
						
							|  |  |  |  * @event_handler: A user-specified callback that is invoked when an | 
					
						
							|  |  |  |  *   asynchronous event not associated with a completion occurs on the CQ. | 
					
						
							|  |  |  |  * @cq_context: Context associated with the CQ returned to the user via | 
					
						
							|  |  |  |  *   the associated completion and event handlers. | 
					
						
							|  |  |  |  * @cqe: The minimum size of the CQ. | 
					
						
							| 
									
										
										
										
											2007-05-03 13:48:47 +03:00
										 |  |  |  * @comp_vector - Completion vector used to signal completion events. | 
					
						
							|  |  |  |  *     Must be >= 0 and < context->num_comp_vectors. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  * | 
					
						
							|  |  |  |  * Users can examine the cq structure to determine the actual CQ size. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_cq *ib_create_cq(struct ib_device *device, | 
					
						
							|  |  |  | 			   ib_comp_handler comp_handler, | 
					
						
							|  |  |  | 			   void (*event_handler)(struct ib_event *, void *), | 
					
						
							| 
									
										
										
										
											2007-05-03 13:48:47 +03:00
										 |  |  | 			   void *cq_context, int cqe, int comp_vector); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_resize_cq - Modifies the capacity of the CQ. | 
					
						
							|  |  |  |  * @cq: The CQ to resize. | 
					
						
							|  |  |  |  * @cqe: The minimum size of the CQ. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Users can examine the cq structure to determine the actual CQ size. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_resize_cq(struct ib_cq *cq, int cqe); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-04-16 21:09:33 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_modify_cq - Modifies moderation params of the CQ | 
					
						
							|  |  |  |  * @cq: The CQ to modify. | 
					
						
							|  |  |  |  * @cq_count: number of CQEs that will trigger an event | 
					
						
							|  |  |  |  * @cq_period: max period of time in usec before triggering an event | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_destroy_cq - Destroys the specified CQ. | 
					
						
							|  |  |  |  * @cq: The CQ to destroy. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_destroy_cq(struct ib_cq *cq); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_poll_cq - poll a CQ for completion(s) | 
					
						
							|  |  |  |  * @cq:the CQ being polled | 
					
						
							|  |  |  |  * @num_entries:maximum number of completions to return | 
					
						
							|  |  |  |  * @wc:array of at least @num_entries &struct ib_wc where completions | 
					
						
							|  |  |  |  *   will be returned | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Poll a CQ for (possibly multiple) completions.  If the return value | 
					
						
							|  |  |  |  * is < 0, an error occurred.  If the return value is >= 0, it is the | 
					
						
							|  |  |  |  * number of completions returned.  If the return value is | 
					
						
							|  |  |  |  * non-negative and < num_entries, then the CQ was emptied. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, | 
					
						
							|  |  |  | 			     struct ib_wc *wc) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return cq->device->poll_cq(cq, num_entries, wc); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_peek_cq - Returns the number of unreaped completions currently | 
					
						
							|  |  |  |  *   on the specified CQ. | 
					
						
							|  |  |  |  * @cq: The CQ to peek. | 
					
						
							|  |  |  |  * @wc_cnt: A minimum number of unreaped completions to check for. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * If the number of unreaped completions is greater than or equal to wc_cnt, | 
					
						
							|  |  |  |  * this function returns wc_cnt, otherwise, it returns the actual number of | 
					
						
							|  |  |  |  * unreaped completions. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_peek_cq(struct ib_cq *cq, int wc_cnt); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_req_notify_cq - Request completion notification on a CQ. | 
					
						
							|  |  |  |  * @cq: The CQ to generate an event for. | 
					
						
							| 
									
										
											  
											
												IB: Return "maybe missed event" hint from ib_req_notify_cq()
The semantics defined by the InfiniBand specification say that
completion events are only generated when a completions is added to a
completion queue (CQ) after completion notification is requested.  In
other words, this means that the following race is possible:
	while (CQ is not empty)
		ib_poll_cq(CQ);
	// new completion is added after while loop is exited
	ib_req_notify_cq(CQ);
	// no event is generated for the existing completion
To close this race, the IB spec recommends doing another poll of the
CQ after requesting notification.
However, it is not always possible to arrange code this way (for
example, we have found that NAPI for IPoIB cannot poll after
requesting notification).  Also, some hardware (eg Mellanox HCAs)
actually will generate an event for completions added before the call
to ib_req_notify_cq() -- which is allowed by the spec, since there's
no way for any upper-layer consumer to know exactly when a completion
was really added -- so the extra poll of the CQ is just a waste.
Motivated by this, we add a new flag "IB_CQ_REPORT_MISSED_EVENTS" for
ib_req_notify_cq() so that it can return a hint about whether the a
completion may have been added before the request for notification.
The return value of ib_req_notify_cq() is extended so:
	 < 0	means an error occurred while requesting notification
	== 0	means notification was requested successfully, and if
		IB_CQ_REPORT_MISSED_EVENTS was passed in, then no
		events were missed and it is safe to wait for another
		event.
	 > 0	is only returned if IB_CQ_REPORT_MISSED_EVENTS was
		passed in.  It means that the consumer must poll the
		CQ again to make sure it is empty to avoid the race
		described above.
We add a flag to enable this behavior rather than turning it on
unconditionally, because checking for missed events may incur
significant overhead for some low-level drivers, and consumers that
don't care about the results of this test shouldn't be forced to pay
for the test.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2007-05-06 21:02:48 -07:00
										 |  |  |  * @flags: | 
					
						
							|  |  |  |  *   Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP | 
					
						
							|  |  |  |  *   to request an event on the next solicited event or next work | 
					
						
							|  |  |  |  *   completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS | 
					
						
							|  |  |  |  *   may also be |ed in to request a hint about missed events, as | 
					
						
							|  |  |  |  *   described below. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Return Value: | 
					
						
							|  |  |  |  *    < 0 means an error occurred while requesting notification | 
					
						
							|  |  |  |  *   == 0 means notification was requested successfully, and if | 
					
						
							|  |  |  |  *        IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events | 
					
						
							|  |  |  |  *        were missed and it is safe to wait for another event.  In | 
					
						
							|  |  |  |  *        this case is it guaranteed that any work completions added | 
					
						
							|  |  |  |  *        to the CQ since the last CQ poll will trigger a completion | 
					
						
							|  |  |  |  *        notification event. | 
					
						
							|  |  |  |  *    > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed | 
					
						
							|  |  |  |  *        in.  It means that the consumer must poll the CQ again to | 
					
						
							|  |  |  |  *        make sure it is empty to avoid missing an event because of a | 
					
						
							|  |  |  |  *        race between requesting notification and an entry being | 
					
						
							|  |  |  |  *        added to the CQ.  This return value means it is possible | 
					
						
							|  |  |  |  *        (but not guaranteed) that a work completion has been added | 
					
						
							|  |  |  |  *        to the CQ since the last poll without triggering a | 
					
						
							|  |  |  |  *        completion notification event. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							|  |  |  | static inline int ib_req_notify_cq(struct ib_cq *cq, | 
					
						
							| 
									
										
											  
											
												IB: Return "maybe missed event" hint from ib_req_notify_cq()
The semantics defined by the InfiniBand specification say that
completion events are only generated when a completions is added to a
completion queue (CQ) after completion notification is requested.  In
other words, this means that the following race is possible:
	while (CQ is not empty)
		ib_poll_cq(CQ);
	// new completion is added after while loop is exited
	ib_req_notify_cq(CQ);
	// no event is generated for the existing completion
To close this race, the IB spec recommends doing another poll of the
CQ after requesting notification.
However, it is not always possible to arrange code this way (for
example, we have found that NAPI for IPoIB cannot poll after
requesting notification).  Also, some hardware (eg Mellanox HCAs)
actually will generate an event for completions added before the call
to ib_req_notify_cq() -- which is allowed by the spec, since there's
no way for any upper-layer consumer to know exactly when a completion
was really added -- so the extra poll of the CQ is just a waste.
Motivated by this, we add a new flag "IB_CQ_REPORT_MISSED_EVENTS" for
ib_req_notify_cq() so that it can return a hint about whether the a
completion may have been added before the request for notification.
The return value of ib_req_notify_cq() is extended so:
	 < 0	means an error occurred while requesting notification
	== 0	means notification was requested successfully, and if
		IB_CQ_REPORT_MISSED_EVENTS was passed in, then no
		events were missed and it is safe to wait for another
		event.
	 > 0	is only returned if IB_CQ_REPORT_MISSED_EVENTS was
		passed in.  It means that the consumer must poll the
		CQ again to make sure it is empty to avoid the race
		described above.
We add a flag to enable this behavior rather than turning it on
unconditionally, because checking for missed events may incur
significant overhead for some low-level drivers, and consumers that
don't care about the results of this test shouldn't be forced to pay
for the test.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2007-05-06 21:02:48 -07:00
										 |  |  | 				   enum ib_cq_notify_flags flags) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							| 
									
										
											  
											
												IB: Return "maybe missed event" hint from ib_req_notify_cq()
The semantics defined by the InfiniBand specification say that
completion events are only generated when a completions is added to a
completion queue (CQ) after completion notification is requested.  In
other words, this means that the following race is possible:
	while (CQ is not empty)
		ib_poll_cq(CQ);
	// new completion is added after while loop is exited
	ib_req_notify_cq(CQ);
	// no event is generated for the existing completion
To close this race, the IB spec recommends doing another poll of the
CQ after requesting notification.
However, it is not always possible to arrange code this way (for
example, we have found that NAPI for IPoIB cannot poll after
requesting notification).  Also, some hardware (eg Mellanox HCAs)
actually will generate an event for completions added before the call
to ib_req_notify_cq() -- which is allowed by the spec, since there's
no way for any upper-layer consumer to know exactly when a completion
was really added -- so the extra poll of the CQ is just a waste.
Motivated by this, we add a new flag "IB_CQ_REPORT_MISSED_EVENTS" for
ib_req_notify_cq() so that it can return a hint about whether the a
completion may have been added before the request for notification.
The return value of ib_req_notify_cq() is extended so:
	 < 0	means an error occurred while requesting notification
	== 0	means notification was requested successfully, and if
		IB_CQ_REPORT_MISSED_EVENTS was passed in, then no
		events were missed and it is safe to wait for another
		event.
	 > 0	is only returned if IB_CQ_REPORT_MISSED_EVENTS was
		passed in.  It means that the consumer must poll the
		CQ again to make sure it is empty to avoid the race
		described above.
We add a flag to enable this behavior rather than turning it on
unconditionally, because checking for missed events may incur
significant overhead for some low-level drivers, and consumers that
don't care about the results of this test shouldn't be forced to pay
for the test.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
											
										 
											2007-05-06 21:02:48 -07:00
										 |  |  | 	return cq->device->req_notify_cq(cq, flags); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_req_ncomp_notif - Request completion notification when there are | 
					
						
							|  |  |  |  *   at least the specified number of unreaped completions on the CQ. | 
					
						
							|  |  |  |  * @cq: The CQ to generate an event for. | 
					
						
							|  |  |  |  * @wc_cnt: The number of unreaped completions that should be on the | 
					
						
							|  |  |  |  *   CQ before an event is generated. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return cq->device->req_ncomp_notif ? | 
					
						
							|  |  |  | 		cq->device->req_ncomp_notif(cq, wc_cnt) : | 
					
						
							|  |  |  | 		-ENOSYS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_get_dma_mr - Returns a memory region for system memory that is | 
					
						
							|  |  |  |  *   usable for DMA. | 
					
						
							|  |  |  |  * @pd: The protection domain associated with the memory region. | 
					
						
							|  |  |  |  * @mr_access_flags: Specifies the memory access rights. | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  |  * | 
					
						
							|  |  |  |  * Note that the ib_dma_*() functions defined below must be used | 
					
						
							|  |  |  |  * to create/destroy addresses used with the Lkey or Rkey returned | 
					
						
							|  |  |  |  * by ib_get_dma_mr(). | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							|  |  |  | struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_mapping_error - check a DMA addr for error | 
					
						
							|  |  |  |  * @dev: The device for which the dma_addr was created | 
					
						
							|  |  |  |  * @dma_addr: The DMA address to check | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		return dev->dma_ops->mapping_error(dev, dma_addr); | 
					
						
							| 
									
										
										
										
											2008-07-25 19:44:49 -07:00
										 |  |  | 	return dma_mapping_error(dev->dma_device, dma_addr); | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_map_single - Map a kernel virtual address to DMA address | 
					
						
							|  |  |  |  * @dev: The device for which the dma_addr is to be created | 
					
						
							|  |  |  |  * @cpu_addr: The kernel virtual address | 
					
						
							|  |  |  |  * @size: The size of the region in bytes | 
					
						
							|  |  |  |  * @direction: The direction of the DMA | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline u64 ib_dma_map_single(struct ib_device *dev, | 
					
						
							|  |  |  | 				    void *cpu_addr, size_t size, | 
					
						
							|  |  |  | 				    enum dma_data_direction direction) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		return dev->dma_ops->map_single(dev, cpu_addr, size, direction); | 
					
						
							|  |  |  | 	return dma_map_single(dev->dma_device, cpu_addr, size, direction); | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single() | 
					
						
							|  |  |  |  * @dev: The device for which the DMA address was created | 
					
						
							|  |  |  |  * @addr: The DMA address | 
					
						
							|  |  |  |  * @size: The size of the region in bytes | 
					
						
							|  |  |  |  * @direction: The direction of the DMA | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void ib_dma_unmap_single(struct ib_device *dev, | 
					
						
							|  |  |  | 				       u64 addr, size_t size, | 
					
						
							|  |  |  | 				       enum dma_data_direction direction) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		dev->dma_ops->unmap_single(dev, addr, size, direction); | 
					
						
							|  |  |  | 	else | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | 		dma_unmap_single(dev->dma_device, addr, size, direction); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-04-29 01:00:34 -07:00
										 |  |  | static inline u64 ib_dma_map_single_attrs(struct ib_device *dev, | 
					
						
							|  |  |  | 					  void *cpu_addr, size_t size, | 
					
						
							|  |  |  | 					  enum dma_data_direction direction, | 
					
						
							|  |  |  | 					  struct dma_attrs *attrs) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return dma_map_single_attrs(dev->dma_device, cpu_addr, size, | 
					
						
							|  |  |  | 				    direction, attrs); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline void ib_dma_unmap_single_attrs(struct ib_device *dev, | 
					
						
							|  |  |  | 					     u64 addr, size_t size, | 
					
						
							|  |  |  | 					     enum dma_data_direction direction, | 
					
						
							|  |  |  | 					     struct dma_attrs *attrs) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return dma_unmap_single_attrs(dev->dma_device, addr, size, | 
					
						
							|  |  |  | 				      direction, attrs); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_map_page - Map a physical page to DMA address | 
					
						
							|  |  |  |  * @dev: The device for which the dma_addr is to be created | 
					
						
							|  |  |  |  * @page: The page to be mapped | 
					
						
							|  |  |  |  * @offset: The offset within the page | 
					
						
							|  |  |  |  * @size: The size of the region in bytes | 
					
						
							|  |  |  |  * @direction: The direction of the DMA | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline u64 ib_dma_map_page(struct ib_device *dev, | 
					
						
							|  |  |  | 				  struct page *page, | 
					
						
							|  |  |  | 				  unsigned long offset, | 
					
						
							|  |  |  | 				  size_t size, | 
					
						
							|  |  |  | 					 enum dma_data_direction direction) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		return dev->dma_ops->map_page(dev, page, offset, size, direction); | 
					
						
							|  |  |  | 	return dma_map_page(dev->dma_device, page, offset, size, direction); | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page() | 
					
						
							|  |  |  |  * @dev: The device for which the DMA address was created | 
					
						
							|  |  |  |  * @addr: The DMA address | 
					
						
							|  |  |  |  * @size: The size of the region in bytes | 
					
						
							|  |  |  |  * @direction: The direction of the DMA | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void ib_dma_unmap_page(struct ib_device *dev, | 
					
						
							|  |  |  | 				     u64 addr, size_t size, | 
					
						
							|  |  |  | 				     enum dma_data_direction direction) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		dev->dma_ops->unmap_page(dev, addr, size, direction); | 
					
						
							|  |  |  | 	else | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | 		dma_unmap_page(dev->dma_device, addr, size, direction); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_map_sg - Map a scatter/gather list to DMA addresses | 
					
						
							|  |  |  |  * @dev: The device for which the DMA addresses are to be created | 
					
						
							|  |  |  |  * @sg: The array of scatter/gather entries | 
					
						
							|  |  |  |  * @nents: The number of scatter/gather entries | 
					
						
							|  |  |  |  * @direction: The direction of the DMA | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline int ib_dma_map_sg(struct ib_device *dev, | 
					
						
							|  |  |  | 				struct scatterlist *sg, int nents, | 
					
						
							|  |  |  | 				enum dma_data_direction direction) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		return dev->dma_ops->map_sg(dev, sg, nents, direction); | 
					
						
							|  |  |  | 	return dma_map_sg(dev->dma_device, sg, nents, direction); | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses | 
					
						
							|  |  |  |  * @dev: The device for which the DMA addresses were created | 
					
						
							|  |  |  |  * @sg: The array of scatter/gather entries | 
					
						
							|  |  |  |  * @nents: The number of scatter/gather entries | 
					
						
							|  |  |  |  * @direction: The direction of the DMA | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void ib_dma_unmap_sg(struct ib_device *dev, | 
					
						
							|  |  |  | 				   struct scatterlist *sg, int nents, | 
					
						
							|  |  |  | 				   enum dma_data_direction direction) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		dev->dma_ops->unmap_sg(dev, sg, nents, direction); | 
					
						
							|  |  |  | 	else | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | 		dma_unmap_sg(dev->dma_device, sg, nents, direction); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-04-29 01:00:34 -07:00
										 |  |  | static inline int ib_dma_map_sg_attrs(struct ib_device *dev, | 
					
						
							|  |  |  | 				      struct scatterlist *sg, int nents, | 
					
						
							|  |  |  | 				      enum dma_data_direction direction, | 
					
						
							|  |  |  | 				      struct dma_attrs *attrs) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, attrs); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, | 
					
						
							|  |  |  | 					 struct scatterlist *sg, int nents, | 
					
						
							|  |  |  | 					 enum dma_data_direction direction, | 
					
						
							|  |  |  | 					 struct dma_attrs *attrs) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, attrs); | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_sg_dma_address - Return the DMA address from a scatter/gather entry | 
					
						
							|  |  |  |  * @dev: The device for which the DMA addresses were created | 
					
						
							|  |  |  |  * @sg: The scatter/gather entry | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline u64 ib_sg_dma_address(struct ib_device *dev, | 
					
						
							|  |  |  | 				    struct scatterlist *sg) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		return dev->dma_ops->dma_address(dev, sg); | 
					
						
							|  |  |  | 	return sg_dma_address(sg); | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_sg_dma_len - Return the DMA length from a scatter/gather entry | 
					
						
							|  |  |  |  * @dev: The device for which the DMA addresses were created | 
					
						
							|  |  |  |  * @sg: The scatter/gather entry | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline unsigned int ib_sg_dma_len(struct ib_device *dev, | 
					
						
							|  |  |  | 					 struct scatterlist *sg) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		return dev->dma_ops->dma_len(dev, sg); | 
					
						
							|  |  |  | 	return sg_dma_len(sg); | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU | 
					
						
							|  |  |  |  * @dev: The device for which the DMA address was created | 
					
						
							|  |  |  |  * @addr: The DMA address | 
					
						
							|  |  |  |  * @size: The size of the region in bytes | 
					
						
							|  |  |  |  * @dir: The direction of the DMA | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, | 
					
						
							|  |  |  | 					      u64 addr, | 
					
						
							|  |  |  | 					      size_t size, | 
					
						
							|  |  |  | 					      enum dma_data_direction dir) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir); | 
					
						
							|  |  |  | 	else | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | 		dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device | 
					
						
							|  |  |  |  * @dev: The device for which the DMA address was created | 
					
						
							|  |  |  |  * @addr: The DMA address | 
					
						
							|  |  |  |  * @size: The size of the region in bytes | 
					
						
							|  |  |  |  * @dir: The direction of the DMA | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void ib_dma_sync_single_for_device(struct ib_device *dev, | 
					
						
							|  |  |  | 						 u64 addr, | 
					
						
							|  |  |  | 						 size_t size, | 
					
						
							|  |  |  | 						 enum dma_data_direction dir) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		dev->dma_ops->sync_single_for_device(dev, addr, size, dir); | 
					
						
							|  |  |  | 	else | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | 		dma_sync_single_for_device(dev->dma_device, addr, size, dir); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_alloc_coherent - Allocate memory and map it for DMA | 
					
						
							|  |  |  |  * @dev: The device for which the DMA address is requested | 
					
						
							|  |  |  |  * @size: The size of the region to allocate in bytes | 
					
						
							|  |  |  |  * @dma_handle: A pointer for returning the DMA address of the region | 
					
						
							|  |  |  |  * @flag: memory allocator flags | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void *ib_dma_alloc_coherent(struct ib_device *dev, | 
					
						
							|  |  |  | 					   size_t size, | 
					
						
							|  |  |  | 					   u64 *dma_handle, | 
					
						
							|  |  |  | 					   gfp_t flag) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag); | 
					
						
							| 
									
										
										
										
											2006-12-15 13:57:26 -08:00
										 |  |  | 	else { | 
					
						
							|  |  |  | 		dma_addr_t handle; | 
					
						
							|  |  |  | 		void *ret; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag); | 
					
						
							|  |  |  | 		*dma_handle = handle; | 
					
						
							|  |  |  | 		return ret; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() | 
					
						
							|  |  |  |  * @dev: The device for which the DMA addresses were allocated | 
					
						
							|  |  |  |  * @size: The size of the region | 
					
						
							|  |  |  |  * @cpu_addr: the address returned by ib_dma_alloc_coherent() | 
					
						
							|  |  |  |  * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void ib_dma_free_coherent(struct ib_device *dev, | 
					
						
							|  |  |  | 					size_t size, void *cpu_addr, | 
					
						
							|  |  |  | 					u64 dma_handle) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2006-12-13 22:10:05 -05:00
										 |  |  | 	if (dev->dma_ops) | 
					
						
							|  |  |  | 		dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); | 
					
						
							|  |  |  | 	else | 
					
						
							| 
									
										
										
										
											2006-12-12 14:27:41 -08:00
										 |  |  | 		dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_reg_phys_mr - Prepares a virtually addressed memory region for use | 
					
						
							|  |  |  |  *   by an HCA. | 
					
						
							|  |  |  |  * @pd: The protection domain associated assigned to the registered region. | 
					
						
							|  |  |  |  * @phys_buf_array: Specifies a list of physical buffers to use in the | 
					
						
							|  |  |  |  *   memory region. | 
					
						
							|  |  |  |  * @num_phys_buf: Specifies the size of the phys_buf_array. | 
					
						
							|  |  |  |  * @mr_access_flags: Specifies the memory access rights. | 
					
						
							|  |  |  |  * @iova_start: The offset of the region's starting I/O virtual address. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, | 
					
						
							|  |  |  | 			     struct ib_phys_buf *phys_buf_array, | 
					
						
							|  |  |  | 			     int num_phys_buf, | 
					
						
							|  |  |  | 			     int mr_access_flags, | 
					
						
							|  |  |  | 			     u64 *iova_start); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_rereg_phys_mr - Modifies the attributes of an existing memory region. | 
					
						
							|  |  |  |  *   Conceptually, this call performs the functions deregister memory region | 
					
						
							|  |  |  |  *   followed by register physical memory region.  Where possible, | 
					
						
							|  |  |  |  *   resources are reused instead of deallocated and reallocated. | 
					
						
							|  |  |  |  * @mr: The memory region to modify. | 
					
						
							|  |  |  |  * @mr_rereg_mask: A bit-mask used to indicate which of the following | 
					
						
							|  |  |  |  *   properties of the memory region are being modified. | 
					
						
							|  |  |  |  * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies | 
					
						
							|  |  |  |  *   the new protection domain to associated with the memory region, | 
					
						
							|  |  |  |  *   otherwise, this parameter is ignored. | 
					
						
							|  |  |  |  * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this | 
					
						
							|  |  |  |  *   field specifies a list of physical buffers to use in the new | 
					
						
							|  |  |  |  *   translation, otherwise, this parameter is ignored. | 
					
						
							|  |  |  |  * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this | 
					
						
							|  |  |  |  *   field specifies the size of the phys_buf_array, otherwise, this | 
					
						
							|  |  |  |  *   parameter is ignored. | 
					
						
							|  |  |  |  * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this | 
					
						
							|  |  |  |  *   field specifies the new memory access rights, otherwise, this | 
					
						
							|  |  |  |  *   parameter is ignored. | 
					
						
							|  |  |  |  * @iova_start: The offset of the region's starting I/O virtual address. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_rereg_phys_mr(struct ib_mr *mr, | 
					
						
							|  |  |  | 		     int mr_rereg_mask, | 
					
						
							|  |  |  | 		     struct ib_pd *pd, | 
					
						
							|  |  |  | 		     struct ib_phys_buf *phys_buf_array, | 
					
						
							|  |  |  | 		     int num_phys_buf, | 
					
						
							|  |  |  | 		     int mr_access_flags, | 
					
						
							|  |  |  | 		     u64 *iova_start); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_query_mr - Retrieves information about a specific memory region. | 
					
						
							|  |  |  |  * @mr: The memory region to retrieve information about. | 
					
						
							|  |  |  |  * @mr_attr: The attributes of the specified memory region. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dereg_mr - Deregisters a memory region and removes it from the | 
					
						
							|  |  |  |  *   HCA translation table. | 
					
						
							|  |  |  |  * @mr: The memory region to deregister. | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  |  * | 
					
						
							|  |  |  |  * This function can fail, if the memory region has memory windows bound to it. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							|  |  |  | int ib_dereg_mr(struct ib_mr *mr); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-07-14 23:48:45 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_alloc_fast_reg_mr - Allocates memory region usable with the | 
					
						
							|  |  |  |  *   IB_WR_FAST_REG_MR send work request. | 
					
						
							|  |  |  |  * @pd: The protection domain associated with the region. | 
					
						
							|  |  |  |  * @max_page_list_len: requested max physical buffer list length to be | 
					
						
							|  |  |  |  *   used with fast register work requests for this MR. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_alloc_fast_reg_page_list - Allocates a page list array | 
					
						
							|  |  |  |  * @device - ib device pointer. | 
					
						
							|  |  |  |  * @page_list_len - size of the page list array to be allocated. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This allocates and returns a struct ib_fast_reg_page_list * and a | 
					
						
							|  |  |  |  * page_list array that is at least page_list_len in size.  The actual | 
					
						
							|  |  |  |  * size is returned in max_page_list_len.  The caller is responsible | 
					
						
							|  |  |  |  * for initializing the contents of the page_list array before posting | 
					
						
							|  |  |  |  * a send work request with the IB_WC_FAST_REG_MR opcode. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The page_list array entries must be translated using one of the | 
					
						
							|  |  |  |  * ib_dma_*() functions just like the addresses passed to | 
					
						
							|  |  |  |  * ib_map_phys_fmr().  Once the ib_post_send() is issued, the struct | 
					
						
							|  |  |  |  * ib_fast_reg_page_list must not be modified by the caller until the | 
					
						
							|  |  |  |  * IB_WC_FAST_REG_MR work request completes. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list( | 
					
						
							|  |  |  | 				struct ib_device *device, int page_list_len); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_free_fast_reg_page_list - Deallocates a previously allocated | 
					
						
							|  |  |  |  *   page list array. | 
					
						
							|  |  |  |  * @page_list - struct ib_fast_reg_page_list pointer to be deallocated. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_update_fast_reg_key - updates the key portion of the fast_reg MR | 
					
						
							|  |  |  |  *   R_Key and L_Key. | 
					
						
							|  |  |  |  * @mr - struct ib_mr pointer to be updated. | 
					
						
							|  |  |  |  * @newkey - new key to be used. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	mr->lkey = (mr->lkey & 0xffffff00) | newkey; | 
					
						
							|  |  |  | 	mr->rkey = (mr->rkey & 0xffffff00) | newkey; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_inc_rkey - increments the key portion of the given rkey. Can be used | 
					
						
							|  |  |  |  * for calculating a new rkey for type 2 memory windows. | 
					
						
							|  |  |  |  * @rkey - the rkey to increment. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline u32 ib_inc_rkey(u32 rkey) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const u32 mask = 0x000000ff; | 
					
						
							|  |  |  | 	return ((rkey + 1) & mask) | (rkey & ~mask); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_alloc_mw - Allocates a memory window. | 
					
						
							|  |  |  |  * @pd: The protection domain associated with the memory window. | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  |  * @type: The type of the memory window (1 or 2). | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  | struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_bind_mw - Posts a work request to the send queue of the specified | 
					
						
							|  |  |  |  *   QP, which binds the memory window to the given address range and | 
					
						
							|  |  |  |  *   remote access attributes. | 
					
						
							|  |  |  |  * @qp: QP to post the bind work request on. | 
					
						
							|  |  |  |  * @mw: The memory window to bind. | 
					
						
							|  |  |  |  * @mw_bind: Specifies information about the memory window, including | 
					
						
							|  |  |  |  *   its address range, remote access rights, and associated memory region. | 
					
						
							| 
									
										
										
										
											2013-02-06 16:19:12 +00:00
										 |  |  |  * | 
					
						
							|  |  |  |  * If there is no immediate error, the function will update the rkey member | 
					
						
							|  |  |  |  * of the mw parameter to its new value. The bind operation can still fail | 
					
						
							|  |  |  |  * asynchronously. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							|  |  |  | static inline int ib_bind_mw(struct ib_qp *qp, | 
					
						
							|  |  |  | 			     struct ib_mw *mw, | 
					
						
							|  |  |  | 			     struct ib_mw_bind *mw_bind) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	/* XXX reference counting in corresponding MR? */ | 
					
						
							|  |  |  | 	return mw->device->bind_mw ? | 
					
						
							|  |  |  | 		mw->device->bind_mw(qp, mw, mw_bind) : | 
					
						
							|  |  |  | 		-ENOSYS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dealloc_mw - Deallocates a memory window. | 
					
						
							|  |  |  |  * @mw: The memory window to deallocate. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_dealloc_mw(struct ib_mw *mw); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_alloc_fmr - Allocates a unmapped fast memory region. | 
					
						
							|  |  |  |  * @pd: The protection domain associated with the unmapped region. | 
					
						
							|  |  |  |  * @mr_access_flags: Specifies the memory access rights. | 
					
						
							|  |  |  |  * @fmr_attr: Attributes of the unmapped region. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * A fast memory region must be mapped before it can be used as part of | 
					
						
							|  |  |  |  * a work request. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, | 
					
						
							|  |  |  | 			    int mr_access_flags, | 
					
						
							|  |  |  | 			    struct ib_fmr_attr *fmr_attr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. | 
					
						
							|  |  |  |  * @fmr: The fast memory region to associate with the pages. | 
					
						
							|  |  |  |  * @page_list: An array of physical pages to map to the fast memory region. | 
					
						
							|  |  |  |  * @list_len: The number of pages in page_list. | 
					
						
							|  |  |  |  * @iova: The I/O virtual address to use with the mapped region. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline int ib_map_phys_fmr(struct ib_fmr *fmr, | 
					
						
							|  |  |  | 				  u64 *page_list, int list_len, | 
					
						
							|  |  |  | 				  u64 iova) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_unmap_fmr - Removes the mapping from a list of fast memory regions. | 
					
						
							|  |  |  |  * @fmr_list: A linked list of fast memory regions to unmap. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_unmap_fmr(struct list_head *fmr_list); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dealloc_fmr - Deallocates a fast memory region. | 
					
						
							|  |  |  |  * @fmr: The fast memory region to deallocate. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_dealloc_fmr(struct ib_fmr *fmr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_attach_mcast - Attaches the specified QP to a multicast group. | 
					
						
							|  |  |  |  * @qp: QP to attach to the multicast group.  The QP must be type | 
					
						
							|  |  |  |  *   IB_QPT_UD. | 
					
						
							|  |  |  |  * @gid: Multicast group GID. | 
					
						
							|  |  |  |  * @lid: Multicast group LID in host byte order. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * In order to send and receive multicast packets, subnet | 
					
						
							|  |  |  |  * administration must have created the multicast group and configured | 
					
						
							|  |  |  |  * the fabric appropriately.  The port associated with the specified | 
					
						
							|  |  |  |  * QP must also be a member of the multicast group. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_detach_mcast - Detaches the specified QP from a multicast group. | 
					
						
							|  |  |  |  * @qp: QP to detach from the multicast group. | 
					
						
							|  |  |  |  * @gid: Multicast group GID. | 
					
						
							|  |  |  |  * @lid: Multicast group LID in host byte order. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-05-23 17:52:46 -07:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * ib_alloc_xrcd - Allocates an XRC domain. | 
					
						
							|  |  |  |  * @device: The device on which to allocate the XRC domain. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * ib_dealloc_xrcd - Deallocates an XRC domain. | 
					
						
							|  |  |  |  * @xrcd: The XRC domain to deallocate. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int ib_dealloc_xrcd(struct ib_xrcd *xrcd); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												IB/core: Add receive flow steering support
The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side.  Applications
using these QPs must be provided with a method to program some
steering rule with the HW so packets arriving at the local port can be
routed to them.
This patch adds ib_create_flow(), which allow providing a flow
specification for a QP.  When there's a match between the
specification and a received packet, the packet is forwarded to that
QP, in a the same way one uses ib_attach_multicast() for IB UD
multicast handling.
Flow specifications are provided as instances of struct ib_flow_spec_yyy,
which describe L2, L3 and L4 headers.  Currently specs for Ethernet, IPv4,
TCP and UDP are defined.  Flow specs are made of values and masks.
The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.
    struct ib_flow_attr {
            enum ib_flow_attr_type type;
            u16      size;
            u16      priority;
            u32      flags;
            u8       num_of_specs;
            u8       port;
            /* Following are the optional layers according to user request
             * struct ib_flow_spec_yyy
             * struct ib_flow_spec_zzz
             */
    };
As these specs are eventually coming from user space, they are defined and
used in a way which allows adding new spec types without kernel/user ABI
change, just with a little API enhancement which defines the newly added spec.
The flow spec structures are defined with TLV (Type-Length-Value)
entries, which allows calling ib_create_flow() with a list of variable
length of optional specs.
For the actual processing of ib_flow_attr the driver uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.
Steering rules processing order is according to the domain over which
the rule is set and the rule priority.  All rules set by user space
applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains
could be used by future IPoIB RFS and Ethetool flow-steering interface
implementation.  Lower numerical value for the priority field means
higher priority.
The returned value from ib_create_flow() is a struct ib_flow, which
contains a database pointer (handle) provided by the HW driver to be
used when calling ib_destroy_flow().
Applications that offload TCP/IP traffic can also be written over IB
UD QPs.  The ib_create_flow() / ib_destroy_flow() API is designed to
support UD QPs too.  A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING
to denote support for flow steering.
The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:
    IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification
    IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
        all Ethernet traffic which isn't steered to any QP
    IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast
    IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic
ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
											
										 
											2013-08-07 14:01:59 +03:00
										 |  |  | struct ib_flow *ib_create_flow(struct ib_qp *qp, | 
					
						
							|  |  |  | 			       struct ib_flow_attr *flow_attr, int domain); | 
					
						
							|  |  |  | int ib_destroy_flow(struct ib_flow *flow_id); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-31 15:26:32 +02:00
										 |  |  | static inline int ib_check_mr_access(int flags) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	/*
 | 
					
						
							|  |  |  | 	 * Local write permission is required if remote write or | 
					
						
							|  |  |  | 	 * remote atomic permission is also requested. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && | 
					
						
							|  |  |  | 	    !(flags & IB_ACCESS_LOCAL_WRITE)) | 
					
						
							|  |  |  | 		return -EINVAL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #endif /* IB_VERBS_H */
 |